Skip to content

Commit

Permalink
Merge pull request #547 from tweag/python-linking-debugging-tools
Browse files Browse the repository at this point in the history
debug: Add python linking debugging tools
  • Loading branch information
mboes committed Jan 18, 2019
2 parents cc100e9 + 7b503b4 commit febef11
Show file tree
Hide file tree
Showing 4 changed files with 260 additions and 0 deletions.
5 changes: 5 additions & 0 deletions debug/linking_utils/BUILD
@@ -0,0 +1,5 @@
py_library(
name = "linking_utils",
srcs = ["ldd.py"],
visibility = ["//visibility:public"],
)
51 changes: 51 additions & 0 deletions debug/linking_utils/README.md
@@ -0,0 +1,51 @@
# Debugging linking errors

The usual utilties like `nm`, `objdump` and of course `ldd` (see
[here](https://linux-audit.com/elf-binaries-on-linux-understanding-and-analysis/#tools-for-binary-analysis)
for a good overview of existing tools) go a long way, but when
debugging non-trivial runtime linker failures one would often like to
filter outputs programmatically with more than just simple `grep` and
`sed` expressions.

This library provides a small set of utility subroutines that can help
debug complicated linker errors.

The main function is `ldd(f, elf_path)`, which is in the same spirit
as `ldd(1)`, but returns a tree of shared dependencies instead of a
flat list. Additionally, it expects a function `f` which is applied to
each recursion level of dependencies.

Functions that can be passed to `ldd`:

- `identity`: pass through every info `ldd` can output
- `remove_uninteresting_dependencies`: remove entries that are mostly noise
- `was_runpath_used`: return a list of unused runpaths

Helpers:
- `dict_remove_empty`: remove fields with empty lists/dicts from an output

Example usage:

```python
import pprint
from bazel_tools.tools.python.runfiles import runfiles
from debug.linking_utils.ldd import \
ldd, \
was_runpath_used, \
dict_remove_empty, \
remove_unnecessary_dependencies

r = runfiles.Create()
pp = pprint.PrettyPrinter(indent=2)

pp.pprint(
ldd(remove_uninteresting_dependencies, path)
)

print("\nUnused RUNPATH entries:")
pp.pprint(
dict_remove_empty(
ldd(was_runpath_used, path)['others']
)
)
```
201 changes: 201 additions & 0 deletions debug/linking_utils/ldd.py
@@ -0,0 +1,201 @@
import subprocess
import os


### helper functions

def list_to_dict(f, l):
"""dict with elements of list as keys & as values transformed by f"""
d = {}
for el in l:
d[el] = f(el)
return d

def dict_remove_empty(d):
"""remove keys that have [] or {} or as values"""
new = {}
for k, v in d.iteritems():
if not (v == [] or v == {}):
new[k] = v
return new

def identity(x):
"""identity function"""
return x

def const(x):
"""(curried) constant function"""
def f(y):
return x
return f


### IO functions that find elf dependencies

def get_runpath_dirs(elf):
"""Find all runpath entries.
Returns:
{ path: unmodified string from DT_RUNPATH
, absolute_path: fully normalized, absolute path to dir }
"""
origin = os.path.dirname(elf)
# TODO: way to get info with less execution overhead
# TODO: cache the results to prevent more than one call per elf binary
res = subprocess.check_output("""objdump -x {} | grep RUNPATH | sed 's/^ *RUNPATH *//'""".format(elf), shell = True).strip()
return [{ 'path': path,
'absolute_path': os.path.normpath(path.replace("$ORIGIN", origin)) }
for path in res.strip(":").split(":")
if path != ""]

def get_needed(elf):
"""Returns the list of DT_NEEDED entries for elf"""
# TODO: way to get info with less execution overhead
# TODO: cache the results to prevent more than one call per elf binary
res = subprocess.check_output("""objdump -x {} | grep NEEDED | sed 's/^ *NEEDED *//'""".format(elf), shell = True).strip()
return res.strip("\n").split("\n")


### Main utility

# cannot find dependency
LDD_MISSING = "MISSING"
# don't know how to search for dependency
LDD_UNKNOWN = "DUNNO"
LDD_ERRORS = [ LDD_MISSING, LDD_UNKNOWN ]

def ldd(f, elf_path):
"""follows DT_NEEDED ELF headers for elf by searching the through DT_RUNPATH.
DependencyInfo :
{ needed : dict(string, union(
LDD_MISSING, LDD_UNKNOWN,
{
# the needed dependency
item : a,
# where the dependency was found in
found_in : RunpathDir
}))
# all runpath directories that were searched
, runpath_dirs : [ RunpathDir ] }
Args:
f: DependencyInfo -> a
modifies the results of each level
elf_path: path to ELF file
Returns: a
"""
def search(rdirs, elf_libname):
"""search for elf_libname in rdirs and return either name or missing"""
res = LDD_MISSING
for rdir in rdirs:
potential_path = os.path.join(rdir['absolute_path'], elf_libname)
if os.path.exists(potential_path):
res = {
'item': potential_path,
'found_in': rdir,
}
break
return res

def recurse(search_res):
if search_res == LDD_MISSING:
return LDD_MISSING
else:
# we keep all other fields the same,
# just item is the one that does the recursion.
# This is the part that would normally be done by fmap.
search_res['item'] = ldd(f, search_res['item'])
return search_res

rdirs = get_runpath_dirs(elf_path)
# if there's no runpath dirs we don't know where to search
if rdirs == []:
needed = list_to_dict(const(LDD_UNKNOWN), get_needed(elf_path))
else:
needed = list_to_dict(
lambda name: recurse(search(rdirs, name)),
get_needed(elf_path)
)

result = {
'runpath_dirs': rdirs,
'needed': needed
}
return f(result)


### Functions to pass to ldd

def remove_uninteresting_dependencies(d):
"""Filter that removes some uninteresting .sos and everything that points to the nix store. Can be abstracted later."""
def bad_needed_p(k):
"predicate for unneeded .sos"
names = [
'libc.so.6',
'ld-linux-x86-64.so.2',
'libgmp.so.10',
'libm.so.6',
]
return (k in names)
def bad_runpath_p(p):
"predicate for unneeded paths"
prefixes = [
"/nix/store/"
]
return any(p.startswith(pref) for pref in prefixes)

runpaths = []
for dir in d['runpath_dirs']:
absp = dir['absolute_path']

# TODO: put in different test, this is interesting info!
# non-existing RUNPATHs
if not os.path.exists(absp):
print("ATTN path doesnt exist: {}".format(absp))

if not bad_runpath_p(absp):
runpaths.append(absp)

needed = {}
for k, v in d['needed'].iteritems():
# filter out some uninteresting deps
if not bad_needed_p(k):
needed[k] = v['item'] if not v in LDD_ERRORS else v

return dict_remove_empty({
'runp': runpaths,
'need': needed,
})


def was_runpath_used(d):
"""returns a dict of two fields; `others` contains a flat dict of all .sos with unused runpath entries and a list of them for each .so"""
used = set()
given = set(r['absolute_path'] for r in d['runpath_dirs'])
prev = {}
for k, v in d['needed'].iteritems():
if not v in LDD_ERRORS:
used.add(v['found_in']['absolute_path'])
prev[k] = v['item']
unused = [
u for u in given.difference(used)
# leave out nix storepaths
if not u.startswith("/nix/store")
]

# Each layer doesn't know about their own name
# So we return a list of unused for this layer ('mine')
# and a dict of all previeous layers combined (name to list)
def combine_unused(deps):
res = {}
for name, dep in deps.iteritems():
res.update(dep['others'])
res[name] = dep['mine']
return res

return {
'mine': unused,
'others': combine_unused(prev),
}
3 changes: 3 additions & 0 deletions tools/README.md
@@ -0,0 +1,3 @@
Note: `py_library`s cannot be put into this folder, lest they produce
a namespace collision with `@bazel_tools/tools`.
See https://github.com/bazelbuild/bazel/issues/7051

0 comments on commit febef11

Please sign in to comment.