Skip to content

Commit

Permalink
fixing up bug that generation skips correct file of right arch
Browse files Browse the repository at this point in the history
Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Jan 22, 2022
1 parent 92db09c commit f5fb1a9
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 72 deletions.
20 changes: 18 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,18 @@ $ elfcall gen data/libfoo.so --fmt text
/usr/lib/x86_64-linux-gnu/libstdc++.so.6 LINKSWITH libc.so.6
/usr/lib/x86_64-linux-gnu/libstdc++.so.6 LINKSWITH ld-linux-x86-64.so.2
/usr/lib/x86_64-linux-gnu/libstdc++.so.6 LINKSWITH libgcc_s.so.1
/lib/x86_64-linux-gnu/libc.so.6 LINKSWITH ld-linux-x86-64.so.2
/usr/lib/x86_64-linux-gnu/libstdc++.so.6 EXPORTS _ZNSt8ios_base4InitC1Ev
/usr/lib/x86_64-linux-gnu/libstdc++.so.6 EXPORTS _ZNSt8ios_base4InitD1Ev
/lib/x86_64-linux-gnu/libc.so.6 EXPORTS __cxa_finalize
/lib/x86_64-linux-gnu/libc.so.6 EXPORTS __cxa_atexit
```

#### Cypher

```bash
$ elfcall gen data/libfoo.so --fmt cypher
```

Note that this is under development, and eventually we will have different graph generation
options (right now we print to the screen).
Expand All @@ -108,16 +116,20 @@ You can also generate a tree of the library paths parsed:
$ elfcall tree data/libfoo.so
libstdc++.so.6 [x86_64-linux-gnu.conf]
ld-linux-x86-64.so.2 [x86_64-linux-gnu.conf]
libm.so.6 [x86_64-linux-gnu.conf]
libgcc_s.so.1 [x86_64-linux-gnu.conf]
libc.so.6 [x86_64-linux-gnu.conf]
```

or:

```bash
$ elfcall tree /usr/bin/vim
libm.so.6 [x86_64-linux-gnu.conf]
ld-linux-x86-64.so.2 [x86_64-linux-gnu.conf]
libtinfo.so.6 [x86_64-linux-gnu.conf]
libselinux.so.1 [x86_64-linux-gnu.conf]
libpcre2-8.so.0 [x86_64-linux-gnu.conf]
ld-linux-x86-64.so.2 [x86_64-linux-gnu.conf]
libcanberra.so.0 [x86_64-linux-gnu.conf]
libvorbisfile.so.3 [x86_64-linux-gnu.conf]
libvorbis.so.0 [x86_64-linux-gnu.conf]
Expand All @@ -126,11 +138,15 @@ libcanberra.so.0 [x86_64-linux-gnu.conf]
libltdl.so.7 [x86_64-linux-gnu.conf]
libacl.so.1 [x86_64-linux-gnu.conf]
libgpm.so.2 [x86_64-linux-gnu.conf]
libdl.so.2 [x86_64-linux-gnu.conf]
libpython3.8.so.1.0 [x86_64-linux-gnu.conf]
libexpat.so.1 [x86_64-linux-gnu.conf]
libz.so.1 [x86_64-linux-gnu.conf]
libutil.so.1 [x86_64-linux-gnu.conf]
libpthread.so.0 [x86_64-linux-gnu.conf]
libc.so.6 [x86_64-linux-gnu.conf]
```


## TODO

- test each of graph generations, add to client
Expand Down
2 changes: 1 addition & 1 deletion elfcall/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def get_parser():
"--fmt",
"-f",
help="graph format to generate",
choices=["text", "ge", "gexf", "console"],
choices=["text", "ge", "gexf", "console", "cypher"],
default="console",
)

Expand Down
35 changes: 19 additions & 16 deletions elfcall/main/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def tree(self, binary=None):
self.ld.parse()

# Load original binary - we need to match elf attributes here
original = elf.ElfFile(binary)
original = elf.ElfFile(os.path.realpath(binary), binary)
results = self.recursive_find(binary, original=original)
self.library_tree(results)

Expand All @@ -78,7 +78,7 @@ def recursive_find(
recursively find needed paths, keep track of hierarchy
"""
# See parse_binary for notes
e = elf.ElfFile(lib)
e = elf.ElfFile(os.path.realpath(lib), lib)

# Keep track of libraries we've seen so we don't loop
if not seen:
Expand Down Expand Up @@ -110,11 +110,10 @@ def recursive_find(
continue
seen.add(path)

libelf, src, already_seen = self.find_library(path, search_paths)

# If it does not match the arch and elf type, ignore
if not original.matches(libelf):
continue
# Also pass in original to do matching
libelf, src, already_seen = self.find_library(
path, search_paths, original
)

# We might get back a soname instead we've already seen
if already_seen:
Expand Down Expand Up @@ -185,12 +184,16 @@ def gen(self, binary=None, fmt=None):
locations = self.parse_binary(binary)

# Select output format (default to console)
# TODO add command line flag
if fmt == "text":
out = graph.Text(locations)
elif fmt == "gv":
out = graph.Gv(locations)
elif fmt == "cypher":
out = graph.Cypher(locations)
elif fmt == "gexf":
out = graph.Gexf(locations)
else:
out = graph.Console(locations)

out.generate()

def get_search_paths(self, e):
Expand Down Expand Up @@ -227,7 +230,7 @@ def parse_binary(self, binary):
# https://refspecs.linuxbase.org/elf/gabi4+/ch5.dynamic.html# see dynamic-section
# We first look at symbol table of executive program to find undefined symbols
# This should fail if not an ELF because we cannot continue
e = elf.ElfFile(binary)
e = elf.ElfFile(os.path.realpath(binary), binary)

# Keep track of imported, found imported, and exported
# imported should be empty at the end
Expand Down Expand Up @@ -259,11 +262,7 @@ def parse_binary(self, binary):
seen.add(path)

# This will return loaded ELF, if found, otherwise None
libelf, _, already_seen = self.find_library(path, search_paths)

# If it does not match the arch and elf type, ignore
if not e.matches(libelf):
continue
libelf, _, already_seen = self.find_library(path, search_paths, e)

# We might get back a soname instead we've already seen
if already_seen:
Expand Down Expand Up @@ -302,7 +301,7 @@ def parse_binary(self, binary):
break
return found

def find_library(self, name, paths):
def find_library(self, name, paths, match_to=None):
"""
Given a listing of paths, look for a library by name
"""
Expand Down Expand Up @@ -341,6 +340,10 @@ def find_library(self, name, paths):
logger.warning("Cannot load %s" % files[name])
continue

# If it does not match the arch and elf type, ignore
if match_to and not match_to.matches(libelf):
continue

# Here we save based on soname, if defined
if libelf.soname:
self.library_cache[libelf.soname] = libelf
Expand Down
6 changes: 4 additions & 2 deletions elfcall/main/graph/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ def get_exported(self):
# meta here (lib) is the one exporting e.g., lib -> export -> filename
for meta in metas:
symbol = meta["name"]
size = meta["size"]
definition = meta["def"]
typ = meta["type"]
bind = meta["bind"]
exported.add((symbol, typ, bind))
Expand All @@ -57,6 +55,10 @@ def parse(self):
self.uids[meta["lib"]["fullpath"]] = self.generate_placeholder()
self.linked_libs[meta["lib"]["fullpath"]] = meta["linked_libs"]

for filename, linked_libs in self.linked_libs.items():
for linked_lib in linked_libs:
self.uids[linked_lib] = self.generate_placeholder()

def generate_placeholder(self):
"""
Generate a unique placeholder name for a node.
Expand Down
107 changes: 56 additions & 51 deletions elfcall/main/graph/cypher.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

import elfcall.utils as utils
from elfcall.logger import logger

import os
import secrets
import string
import sys
Expand All @@ -14,65 +16,68 @@

class Cypher(GraphBase):
def generate(self):
logger.info("Output will be written to %s" % self.outfile)
with open(self.outfile, "w") as fd:
fd.write("CREATE ")
newline = ", \n"
seenfirst = False
for filename, symbols in self.organized.items():
if self.outfile == sys.stdout:
fd = sys.stdout
else:
logger.info("Output will be written to %s" % self.outfile)
fd = open(self.outfile, "w")

fd.write("CREATE ")
newline = ", \n"
seenfirst = False
for filename, symbols in self.organized.items():
if seenfirst:
fd.write(newline)
else:
seenfirst = True
fd.write(
"(%s:ELF {name: '%s', label: '%s'})"
% (self.uids[filename], filename, os.path.basename(filename))
)

seenfirst = False
# Record linked dependencies
for filename, symbols in self.organized.items():
for linked_lib in self.linked_libs[filename]:
if seenfirst:
fd.write(newline)
else:
seenfirst = True
fd.write(
"(%s:ELF {name: '%s', label: '%s'})"
% (
self.uids[filename],
filename,
os.path.basename(filename),
)
)

seenfirst = False
# Record linked dependencies
for filename, symbols in self.organized.items():
for linked_lib in self.linked_libs[filename]:
if seenfirst:
fd.write(newline)
else:
seenfirst = True
fd.write(
"(%s)-[:LINKSWITH]->(%s)"
% (
self.uids[filename],
self.uids[linked_lib],
)
)

exported = self.get_exported()

# Create a placeholder for each
for symbol in exported:
placeholder = self.generate_placeholder()
self.symbol_uids[symbol[0]] = placeholder
fd.write("\n")
fd.write(
"(%s:SYMBOL {name: '%s', type: '%s'})"
"(%s)-[:LINKSWITH]->(%s)"
% (
placeholder,
symbol[0],
symbol[1],
self.uids[filename],
self.uids[linked_lib],
)
)

fd.write(newline)
fd.write("(%s)-[:EXPORTS]->(%s)" % (self.uids[filename], placeholder))
exported = self.get_exported()

# Create a placeholder for each
for symbol in exported:
placeholder = self.generate_placeholder()
self.symbol_uids[symbol[0]] = placeholder
fd.write("\n")
fd.write(
"(%s:SYMBOL {name: '%s', type: '%s'})"
% (
placeholder,
symbol[0],
symbol[1],
)
)

fd.write(newline)
fd.write("(%s)-[:EXPORTS]->(%s)" % (self.uids[filename], placeholder))

# store which files use which symbols
for filename, metas in self.organized.items():
for meta in metas:
symbol = meta["name"]
placeholder = self.symbol_uids[symbol]
fd.write("\n(%s)-[:USES]->(%s)" % (self.uids[filename], placeholder))

# store which files use which symbols
for filename, metas in self.organized.items():
for meta in metas:
symbol = meta["name"]
placeholder = self.symbol_uids[symbol]
fd.write("(%s)-[:USES]->(%s)" % (self.uids[filename], placeholder))
fd.write(";\n")

fd.write(";\n")
if self.outfile != sys.stdout:
fd.close()

0 comments on commit f5fb1a9

Please sign in to comment.