mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-12643 (part 2): Fallback to safe libraries on error in resolve_minidumps.py
Since resolve_minidumps.py's call to minidump_stackwalk can go haywire due to bad symbols in shared libraries, this adds a fallback mechanism where it tries again with a "safe" list of shared libraries. These are limited to the ones that make the most difference in resolving minidumps (libc, libstdc++, and libjvm). The list of safe libraries can be customized via the --safe_library_list. Testing: - Verified that this uses the fallback on Centos 7 and resolves the minidumps successfully. Change-Id: I6bb4c9f65f9c27bb3b86c7ff2f3a6a48e258ef01 Reviewed-on: http://gerrit.cloudera.org:8080/20863 Reviewed-by: Michael Smith <michael.smith@cloudera.com> Tested-by: Joe McDonnell <joemcdonnell@cloudera.com>
This commit is contained in:
@@ -43,6 +43,7 @@ import shutil
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import traceback
|
||||||
|
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
@@ -132,6 +133,26 @@ def read_module_info(minidump_dump_contents):
|
|||||||
return modules
|
return modules
|
||||||
|
|
||||||
|
|
||||||
|
def filter_shared_library_modules(module_list, lib_allow_list):
|
||||||
|
"""Filter the list of modules by eliminating any shared libaries that do not match
|
||||||
|
one of the prefixes in the allow list. This keeps all non-shared libaries
|
||||||
|
(such as the main binary).
|
||||||
|
"""
|
||||||
|
filtered_module_list = []
|
||||||
|
for module in module_list:
|
||||||
|
code_file_basename = os.path.basename(module.code_file)
|
||||||
|
# Keep anything that is not a shared library (e.g. the main binary)
|
||||||
|
if ".so" not in code_file_basename:
|
||||||
|
filtered_module_list.append(module)
|
||||||
|
continue
|
||||||
|
# Only keep shared libraries that match an entry on the allow list.
|
||||||
|
for allow_lib in lib_allow_list:
|
||||||
|
if code_file_basename.startswith(allow_lib):
|
||||||
|
filtered_module_list.append(module)
|
||||||
|
break
|
||||||
|
return filtered_module_list
|
||||||
|
|
||||||
|
|
||||||
def find_breakpad_home():
|
def find_breakpad_home():
|
||||||
"""Locate the Breakpad home directory.
|
"""Locate the Breakpad home directory.
|
||||||
|
|
||||||
@@ -331,10 +352,39 @@ def parse_args():
|
|||||||
parser.add_argument('--minidump_file', required=True)
|
parser.add_argument('--minidump_file', required=True)
|
||||||
parser.add_argument('--output_file', required=True)
|
parser.add_argument('--output_file', required=True)
|
||||||
parser.add_argument('-v', '--verbose', action='store_true')
|
parser.add_argument('-v', '--verbose', action='store_true')
|
||||||
|
parser.add_argument('--safe_library_list',
|
||||||
|
default="libstdc++.so,libc.so,libjvm.so",
|
||||||
|
help="Comma-separate list of prefixes for allowed system libraries")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def dump_syms_and_resolve_stack(modules, minidump_file, output_file, verbose):
|
||||||
|
"""Dump the symbols for the listed modules and use them to resolve the minidump."""
|
||||||
|
# Create a temporary directory to store the symbols
|
||||||
|
# This automatically gets cleaned up
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
# Dump symbols for all the modules into this temporary directory.
|
||||||
|
# Need both dump_syms and objcopy
|
||||||
|
dump_syms_bin = find_breakpad_binary("dump_syms")
|
||||||
|
if not dump_syms_bin:
|
||||||
|
logging.error("Could not find Breakpad dump_syms binary")
|
||||||
|
sys.exit(1)
|
||||||
|
objcopy_bin = find_objcopy_binary()
|
||||||
|
if not objcopy_bin:
|
||||||
|
logging.error("Could not find Binutils objcopy binary")
|
||||||
|
sys.exit(1)
|
||||||
|
dump_symbols_for_all_modules(dump_syms_bin, objcopy_bin, modules, tmp_dir)
|
||||||
|
|
||||||
|
# Resolve the minidump with the temporary symbol directory
|
||||||
|
minidump_stackwalk_bin = find_breakpad_binary("minidump_stackwalk")
|
||||||
|
if not minidump_stackwalk_bin:
|
||||||
|
logging.error("Could not find Breakpad minidump_stackwalk binary")
|
||||||
|
sys.exit(1)
|
||||||
|
resolve_minidump(find_breakpad_binary("minidump_stackwalk"), minidump_file,
|
||||||
|
tmp_dir, verbose, output_file)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
@@ -361,28 +411,30 @@ def main():
|
|||||||
logging.error("Failed to read modules for {0}".format(args.minidump_file))
|
logging.error("Failed to read modules for {0}".format(args.minidump_file))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Create a temporary directory to store the symbols.
|
# Step 3: Dump the symbols and use them to resolve the minidump
|
||||||
# This automatically gets cleaned up.
|
# Sometimes there are libraries with corrupt/problematic symbols
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
# that can cause minidump_stackwalk to go haywire and use excessive
|
||||||
# Step 3: Dump symbols for all the modules into this temporary directory.
|
# memory. First, we try using symbols from all of the shared libraries.
|
||||||
# Need both dump_syms and objcopy
|
# If that fails, we fallback to using a "safe" list of shared libraries.
|
||||||
dump_syms_bin = find_breakpad_binary("dump_syms")
|
try:
|
||||||
if not dump_syms_bin:
|
# Dump the symbols and use them to resolve the minidump
|
||||||
logging.error("Could not find Breakpad dump_syms binary")
|
dump_syms_and_resolve_stack(modules, args.minidump_file, args.output_file,
|
||||||
sys.exit(1)
|
args.verbose)
|
||||||
objcopy_bin = find_objcopy_binary()
|
return
|
||||||
if not objcopy_bin:
|
except Exception:
|
||||||
logging.error("Could not find Binutils objcopy binary")
|
logging.warning("Encountered error: {0}".format(traceback.format_exc()))
|
||||||
sys.exit(1)
|
logging.warning("Falling back to resolution using the safe library list")
|
||||||
dump_symbols_for_all_modules(dump_syms_bin, objcopy_bin, modules, tmp_dir)
|
logging.warning("Safe library list: {0}".format(args.safe_library_list))
|
||||||
|
|
||||||
# Step 4: Resolve the minidump with the temporary symbol directory
|
# Limit the shared libraries to the "safe" list of shared libraries and
|
||||||
minidump_stackwalk_bin = find_breakpad_binary("minidump_stackwalk")
|
# try again.
|
||||||
if not minidump_stackwalk_bin:
|
if len(args.safe_library_list) == 0:
|
||||||
logging.error("Could not find Breakpad minidump_stackwalk binary")
|
safe_library_list = []
|
||||||
sys.exit(1)
|
else:
|
||||||
resolve_minidump(find_breakpad_binary("minidump_stackwalk"), args.minidump_file,
|
safe_library_list = args.safe_library_list.split(",")
|
||||||
tmp_dir, args.verbose, args.output_file)
|
safe_modules = filter_shared_library_modules(modules, safe_library_list)
|
||||||
|
dump_syms_and_resolve_stack(safe_modules, args.minidump_file, args.output_file,
|
||||||
|
args.verbose)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user