#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # This script automates symbol resolution for Breakpad minidumps # under ideal circumstances. Specifically, it expects all the # binaries to be in the same locations as when the minidump # was taken. This is often true for minidumps on a developer # workstation or at the end of an Impala test job. It finds Breakpad # using environment variables from the Impala dev environment, # so it must run inside the Impala dev environment. # TODO: It may be possible to extend this to Docker images. # # Within this simple context, this script aims for complete # symbol resolution. It uses Breakpad's minidump_dump utility # to dump the minidump, then it parses the list of libraries # that were used by the binary. It gets the symbols for all # those libraries and resolves the minidump. # # Usage: resolve_minidumps.py --minidump_file [file] --output_file [file] # (optional -v or --verbose for more output) import errno import logging import os import re import shutil import subprocess import sys import tempfile import traceback from argparse import ArgumentParser class ModuleInfo: def __init__(self, code_file, code_id, debug_file, debug_id): self.code_file = code_file self.code_id = code_id self.debug_file = debug_file self.debug_id = debug_id def read_module_info(minidump_dump_contents): """Read the module information out of the minidump_dump raw contents. This is expecting 'minidump_dump_contents' to be the minidump_dump contents for the minidump split by newlines. This will return a list of ModuleInfo objects. """ # Find the module_count for idx, line in enumerate(minidump_dump_contents): if line.strip().startswith("module_count"): module_count = int(line.split("=")[1].strip()) break # The minidump has a MDRawModule per module and it will have # the same number of MDRawModule dumps as module_count. module_boundaries = [] for idx, line in enumerate(minidump_dump_contents): if line.startswith("MDRawModule"): module_boundaries.append(idx) if len(module_boundaries) != module_count: logging.error("Failed to parse modules, mismatch in module count " "({0} != {1})".format(len(module_boundaries), module_count)) return None # Add one more entry to module_boundaries that is the end of the file # That makes this more of a list of boundaries than the list of # start locations. module_boundaries.append(len(minidump_dump_contents)) modules = [] for module_idx in range(module_count): module_start = module_boundaries[module_idx] module_end = module_boundaries[module_idx + 1] # Find the code_file code_file = None code_identifier = None debug_file = None debug_identifier = None for line in minidump_dump_contents[module_start:module_end]: if line.find("code_file") != -1: code_file = line.split("=")[1].strip().strip('"') elif line.find("code_identifier") != -1: code_identifier = line.split("=")[1].strip().strip('"') elif line.find("debug_file") != -1: debug_file = line.split("=")[1].strip().strip('"') elif line.find("debug_identifier") != -1: debug_identifier = line.split("=")[1].strip().strip('"') # Important: it is ok for the fields to be the zero-length string. # We just care that they are non-None (i.e. the loop above encountered # them and parsed a value). if code_file is None or code_identifier is None or debug_file is None or \ debug_identifier is None: logging.error("Failed to parse dump output, missing fields for MDRawModule " "{0}".format(module_idx)) return None # Jars and other files show up in this list, but they have # code identifiers or debug identifiers as all zeros. Skip those, # as there are no symbols to find. if re.fullmatch("[0]+", code_identifier) or re.fullmatch("[0]+", debug_identifier): continue # Skip cases where the code identifier or debug identifier are null if len(code_identifier) == 0 or len(debug_identifier) == 0: continue # linux-gate.so is a special case, and it is not an actual file on disk. if code_file.startswith("linux-gate.so"): continue modules.append(ModuleInfo(code_file, code_identifier, debug_file, debug_identifier)) return modules def filter_shared_library_modules(module_list, lib_allow_list): """Filter the list of modules by eliminating any shared libaries that do not match one of the prefixes in the allow list. This keeps all non-shared libaries (such as the main binary). """ filtered_module_list = [] for module in module_list: code_file_basename = os.path.basename(module.code_file) # Keep anything that is not a shared library (e.g. the main binary) if ".so" not in code_file_basename: filtered_module_list.append(module) continue # Only keep shared libraries that match an entry on the allow list. for allow_lib in lib_allow_list: if code_file_basename.startswith(allow_lib): filtered_module_list.append(module) break return filtered_module_list def find_breakpad_home(): """Locate the Breakpad home directory. We try to locate the package in the Impala toolchain folder. """ toolchain_packages_home = os.environ.get('IMPALA_TOOLCHAIN_PACKAGES_HOME') if not toolchain_packages_home: logging.error("IMPALA_TOOLCHAIN_PACKAGES_HOME is not set") return None if not os.path.isdir(toolchain_packages_home): logging.error("Could not find toolchain packages directory") return None breakpad_version = os.environ.get('IMPALA_BREAKPAD_VERSION') if not breakpad_version: logging.error("Could not determine breakpad version from toolchain") return None breakpad_dir = '{0}/breakpad-{1}'.format(toolchain_packages_home, breakpad_version) if not os.path.isdir(breakpad_dir): logging.error("Could not find breakpad directory") return None return breakpad_dir def find_breakpad_binary(binary_name): """Locate the specified Breadpad binary""" breakpad_home = find_breakpad_home() if not breakpad_home: return None binary_path = os.path.join(breakpad_home, 'bin', binary_name) if not os.path.isfile(binary_path): logging.error("Could not find {0} executable at {1}".format(binary_name, binary_path)) return None return binary_path def find_objcopy_binary(): """Locate the 'objcopy' binary from Binutils. We try to locate the package in the Impala toolchain folder. TODO: Fall back to finding objcopy in the system path. """ toolchain_packages_home = os.environ.get('IMPALA_TOOLCHAIN_PACKAGES_HOME') if not toolchain_packages_home: logging.error("IMPALA_TOOLCHAIN_PACKAGES_HOME is not set") return None if not os.path.isdir(toolchain_packages_home): logging.error("Could not find toolchain packages directory") return None binutils_version = os.environ.get('IMPALA_BINUTILS_VERSION') if not binutils_version: logging.error("Could not determine binutils version from toolchain") return None binutils_dir = "binutils-{0}".format(binutils_version) objcopy = os.path.join(toolchain_packages_home, binutils_dir, 'bin', 'objcopy') if not os.path.isfile(objcopy): logging.error("Could not find objcopy executable at {0}".format(objcopy)) return None return objcopy def ensure_dir_exists(path): """Make sure the directory 'path' exists in a thread-safe way.""" try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST or not os.path.isdir(path): raise e def dump_symbols_for_binary(dump_syms, objcopy, binary, out_dir): """Dump symbols of a single binary file and move the result. Symbols will be extracted to a temporary file and moved into place afterwards. Required directories will be created if necessary. """ logging.info("Processing binary file: {0}".format(binary)) ensure_dir_exists(out_dir) # tmp_fd will be closed when the file object created by os.fdopen() below gets # destroyed. tmp_fd, tmp_file = tempfile.mkstemp(dir=out_dir, suffix='.sym') try: # Create a temporary directory used for decompressing debug info tempdir = tempfile.mkdtemp() # Binaries can contain compressed debug symbols. Breakpad currently # does not support dumping symbols for binaries with compressed debug # symbols. # # As a workaround, this uses objcopy to create a copy of the binary with # the debug symbols decompressed. If the debug symbols are not compressed # in the original binary, objcopy simply makes a copy of the binary. # Breakpad is able to read symbols from the decompressed binary, and # those symbols work correctly in resolving a minidump from the original # compressed binary. # TODO: In theory, this could work with the binary.debug_path. binary_basename = os.path.basename(binary) decompressed_binary = os.path.join(tempdir, binary_basename) objcopy_retcode = subprocess.call([objcopy, "--decompress-debug-sections", binary, decompressed_binary]) # Run dump_syms on the binary # If objcopy failed for some reason, fall back to running dump_syms # directly on the original binary. This is unlikely to work, but it is a way of # guaranteeing that objcopy is not the problem. args = [dump_syms, decompressed_binary] if objcopy_retcode != 0: sys.stderr.write('objcopy failed. Trying to run dump_sym directly.\n') args = [dump_syms, binary] # Run dump_syms on the binary. proc = subprocess.Popen(args, stdout=os.fdopen(tmp_fd, 'wb'), stderr=subprocess.PIPE) _, stderr = proc.communicate() if proc.returncode != 0: sys.stderr.write('Failed to dump symbols from %s, return code %s\n' % (binary, proc.returncode)) sys.stderr.write(stderr.decode('utf-8')) os.remove(tmp_file) return False # Parse the temporary file to determine the full target path. with open(tmp_file, 'r') as f: header = f.readline().strip() # Format of header is: MODULE os arch binary_id binary _, _, _, binary_id, binary = header.split(' ') out_path = os.path.join(out_dir, binary, binary_id) ensure_dir_exists(out_path) # Move the temporary file to its final destination. shutil.move(tmp_file, os.path.join(out_path, '%s.sym' % binary)) except Exception as e: # Only need to clean up in case of errors. try: os.remove(tmp_file) except EnvironmentError: pass raise e finally: # Cleanup temporary directory shutil.rmtree(tempdir) return True def dump_symbols_for_all_modules(dump_syms, objcopy, module_list, out_dir): """Given a list of modules (ModuleInfo objects), dump symbols for each library listed. """ for module in module_list: success = dump_symbols_for_binary(dump_syms, objcopy, module.code_file, out_dir) if not success: logging.warning("Failed to dump symbols for {0}".format(module.code_file)) def resolve_minidump(minidump_stackwalk, minidump_path, symbol_dir, verbose, out_file): minidump_stackwalk_cmd = [minidump_stackwalk, minidump_path, symbol_dir] # There are circumstances where the minidump_stackwalk can go wrong and become # a runaway process capable of using all system memory. If the prlimit utility # is present, we use it to apply a limit on the memory consumption. # # See if we have the prlimit utility check_prlimit = subprocess.run(["prlimit", "-V"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) if check_prlimit.returncode == 0: # The prlimit utility is available, so wrap the minidump_stackwalk command # to apply a 4GB limit on virtual memory. In normal operations, 4G is plenty. prlimit_wrapper = ["prlimit", "--as={0}".format(4 * 1024 * 1024 * 1024)] minidump_stackwalk_cmd = prlimit_wrapper + minidump_stackwalk_cmd with open(out_file, "w") as out_f: stderr_output = None if verbose else subprocess.DEVNULL subprocess.run(minidump_stackwalk_cmd, stdout=out_f, stderr=stderr_output, check=True) def raw_dump_for_minidump(minidump_dump, minidump_path): """Run minidump_dump on the specified minidump and split the output into lines""" # minidump_dump sometimes returns an error code even though it produced usable output. # So, this doesn't check the error code, and it relies on read_module_info() doing # validation. # # Python 3.6 adjustments: # 'capture_output=True' not supported: set stdout/stderr to subprocess.PIPE instead # 'text=True' not supported: set 'universal_newlines=True' (the two are the same thing) output = subprocess.run([minidump_dump, minidump_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) return output.stdout.split('\n') def parse_args(): """Parse command line arguments and perform sanity checks.""" # TODO: # - Add ability to specify Breakpad home # - Add ability to specify the symbol directory location (for reuse) # - Add ability to specify Binutils home parser = ArgumentParser() parser.add_argument('--minidump_file', required=True) parser.add_argument('--output_file', required=True) parser.add_argument('-v', '--verbose', action='store_true') parser.add_argument('--safe_library_list', default="libstdc++.so,libc.so,libjvm.so", help="Comma-separate list of prefixes for allowed system libraries") args = parser.parse_args() return args def dump_syms_and_resolve_stack(modules, minidump_file, output_file, verbose): """Dump the symbols for the listed modules and use them to resolve the minidump.""" # Create a temporary directory to store the symbols # This automatically gets cleaned up with tempfile.TemporaryDirectory() as tmp_dir: # Dump symbols for all the modules into this temporary directory. # Need both dump_syms and objcopy dump_syms_bin = find_breakpad_binary("dump_syms") if not dump_syms_bin: logging.error("Could not find Breakpad dump_syms binary") sys.exit(1) objcopy_bin = find_objcopy_binary() if not objcopy_bin: logging.error("Could not find Binutils objcopy binary") sys.exit(1) dump_symbols_for_all_modules(dump_syms_bin, objcopy_bin, modules, tmp_dir) # Resolve the minidump with the temporary symbol directory minidump_stackwalk_bin = find_breakpad_binary("minidump_stackwalk") if not minidump_stackwalk_bin: logging.error("Could not find Breakpad minidump_stackwalk binary") sys.exit(1) resolve_minidump(find_breakpad_binary("minidump_stackwalk"), minidump_file, tmp_dir, verbose, output_file) def main(): args = parse_args() if args.verbose: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) # Step 1: Get the raw dump for the specified minidump minidump_dump_bin = find_breakpad_binary("minidump_dump") if not minidump_dump_bin: logging.error("Could not find Breakpad minidump_dump binary") sys.exit(1) contents = raw_dump_for_minidump(minidump_dump_bin, args.minidump_file) if not contents: logging.error( "minidump_dump could not get the contents of {0}".format(args.minidump_file)) sys.exit(1) # Step 2: Parse the raw dump to get the list of code modules # This is the list of things that have symbols we need to dump. modules = read_module_info(contents) if not modules: logging.error("Failed to read modules for {0}".format(args.minidump_file)) sys.exit(1) # Step 3: Dump the symbols and use them to resolve the minidump # Sometimes there are libraries with corrupt/problematic symbols # that can cause minidump_stackwalk to go haywire and use excessive # memory. First, we try using symbols from all of the shared libraries. # If that fails, we fallback to using a "safe" list of shared libraries. try: # Dump the symbols and use them to resolve the minidump dump_syms_and_resolve_stack(modules, args.minidump_file, args.output_file, args.verbose) return except Exception: logging.warning("Encountered error: {0}".format(traceback.format_exc())) logging.warning("Falling back to resolution using the safe library list") logging.warning("Safe library list: {0}".format(args.safe_library_list)) # Limit the shared libraries to the "safe" list of shared libraries and # try again. if len(args.safe_library_list) == 0: safe_library_list = [] else: safe_library_list = args.safe_library_list.split(",") safe_modules = filter_shared_library_modules(modules, safe_library_list) dump_syms_and_resolve_stack(safe_modules, args.minidump_file, args.output_file, args.verbose) if __name__ == "__main__": main()