Skip to content

Commit

Permalink
adding MOOOOAR examples (#4)
Browse files Browse the repository at this point in the history
Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Jun 10, 2022
1 parent 96a3fcb commit 7d7ac38
Show file tree
Hide file tree
Showing 38 changed files with 4,392 additions and 45 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
build
__pycache__
dist
example
MANIFEST

ccle/armel
Expand Down
11 changes: 1 addition & 10 deletions README-SMEAGLE.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,7 @@ tests.py /home/vanessa/Desktop/Code/cle/examples/callsite/lib.so

## TODO:
- Vanessasaurus:
- Examples to add:
- enum as a parameter
- fixed size arrays
- structures of structures
- structures of arrays of structures
- structures of arrays of arrays
- complex types (look at unit tests)
- const versions of types (also in until tests)
- just review smeagle unit tests
- option to filter out some line programs based on location (to deal with unknown pointers)
- option to filter out some line programs based on location (to deal with unknown pointers) - NOTE seems to be bug with new dwarf and DW_AT_stmt_list parsing
- Tim:
- Write out high level approach
- need to complement this with C++ interface to get callsites into Python. Only need for callsites for now, unless speed is an issue in the future.
Expand Down
21 changes: 16 additions & 5 deletions cle/backends/elf/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,19 @@ def parse_subprogram(self, die):
functions, hence why we parse the subprogram children here to find
the rest.
"""
name = self.get_name(die)
if self.symbols and name not in self.symbols:
return

# If has DW_TAG_external, we know it's external outside of this CU
if "DW_AT_external" not in die.attributes:
return

# TODO see page 92 of https://dwarfstd.org/doc/DWARF4.pdf
# need to parse virtual functions and other attributes
entry = {"name": self.get_name(die)}
entry = {"name": name}
if name in self.symbols:
entry["direction"] = self.symbols[name]

# Set the allocator on the level of the function
allocator = None
Expand All @@ -185,6 +191,7 @@ def parse_subprogram(self, die):
# Hold previous child for modifiers
param = None
for child in die.iter_children():

# can either be inlined subroutine or format parameter
if child.tag == "DW_TAG_formal_parameter":
param = {"size": self.get_size(child)}
Expand Down Expand Up @@ -486,10 +493,10 @@ def parse_enumeration_type(self, die):
entry = {
"name": self.get_name(die),
"size": self.get_size(die),
"class": "Scalar",
}
underlying_type = self.parse_underlying_type(die)
entry.update(underlying_type)
entry["class"] = "Enum"

fields = []
for child in die.iter_children():
Expand Down Expand Up @@ -565,14 +572,14 @@ def parse_sibling(self, die):
return self.parse_underlying_type(sibling)

@cache_type
def parse_underlying_type(self, die, indirections=0):
def parse_underlying_type(self, die, indirections=0, entry=None):
"""
Given a type, parse down to the underlying type (and count pointer indirections)
"""
if die in self.underlying_types:
return self.underlying_types[die]

entry = {}
entry = entry or {}
if "DW_AT_type" not in die.attributes:
return entry

Expand Down Expand Up @@ -607,6 +614,9 @@ def parse_underlying_type(self, die, indirections=0):
if type_die and type_die.tag == "DW_TAG_union_type":
return self.parse_union_type(type_die)

if type_die and type_die.tag == "DW_TAG_enumeration_type":
return self.parse_enumeration_type(type_die)

# Case 1: It's an array (and type is for elements)
if type_die and type_die.tag == "DW_TAG_array_type":
entry = self.parse_array_type(type_die)
Expand All @@ -615,9 +625,10 @@ def parse_underlying_type(self, die, indirections=0):
{
"name": self.get_name(die),
"class": "Array",
"type": array_type["type"],
}
)
if "type" in array_type:
entry["type"] = array_type["type"]
return entry

# Struct
Expand Down
34 changes: 29 additions & 5 deletions cle/backends/elf/elf.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,10 @@ def __init__(
self.has_dwarf_info = False

if dwarf:
# Prepare exported symbols
dynamic_symbols = self._load_dynamic_symbols()
# Prepare a corpus to populate
self.corpus = ElfCorpus(self.binary, arch=self.arch)
self.corpus = ElfCorpus(self.binary, arch=self.arch, symbols=dynamic_symbols)
# Load DIEs
self._load_dies(dwarf)
# Load function hints and exception handling artifacts
Expand Down Expand Up @@ -220,7 +222,6 @@ def __init__(
for offset, patch in patch_undo:
self.memory.store(AT.from_lva(self.min_addr + offset, self).to_rva(), patch)


#
# Properties and Public Methods
#
Expand Down Expand Up @@ -350,6 +351,23 @@ def finalizers(self):
def symbols_by_name(self):
return self._symbols_by_name.copy()

def _load_dynamic_symbols(self):
"""
We only care about dynamic symbols
"""
dynamic_symbols = {}
for section in self._reader.iter_sections():
if section.name == ".dynsym":
for symbol in section.iter_symbols():
if symbol.entry['st_info']['bind'] != "STB_GLOBAL" or symbol.entry['st_info']['type'] == "STT_DELETED":
continue
# undefined is import, and everything else is export
direction = "export"
if symbol.entry['st_shndx'] == 'SHN_UNDEF':
direction = "import"
dynamic_symbols[symbol.name] = direction
return dynamic_symbols

def get_symbol(self, symid, symbol_table=None): # pylint: disable=arguments-differ
"""
Gets a Symbol object for the specified symbol.
Expand Down Expand Up @@ -600,16 +618,22 @@ def _load_line_info(self, dwarf):
for cu in dwarf.iter_CUs():
comp_dir = '.'
die = cu.get_top_DIE()

if 'DW_AT_comp_dir' in die.attributes:
comp_dir = die.attributes['DW_AT_comp_dir'].value.decode()

# NOTE there seems to be a bug when it's parsed with this,
# although it still returns None
if "DW_AT_stmt_list" in die.attributes:
del die.attributes["DW_AT_stmt_list"]

# Added because this fails sometimes, along with lineprog.get_entries()
try:
lineprog = dwarf.line_program_for_CU(cu)
if lineprog is None:
continue
entries = lineprog.get_entries()
except:
continue
if lineprog is None:
except Exception as e:
continue
file_cache = {}
for line in entries:
Expand Down
2 changes: 2 additions & 0 deletions cle/backends/elf/lsda.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ def _parse_lsda_header(self):
base_encoding = lpstart_encoding & 0x0f
modifier = lpstart_encoding & 0xf0

import IPython
IPython.embed()
lpstart = struct_parse(
Struct('dummy',
self._formats[base_encoding]('LPStart')),
Expand Down
2 changes: 1 addition & 1 deletion cle/backends/elf/parser/AMD64/allocators.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def get_register_string(self, lo, hi, param) -> str:
or lo == RegisterClass.COMPLEX_X87
or hi == RegisterClass.X87UP
):
return fallocator.next_framebase_from_type(param)
return self.fallocator.next_framebase_from_type(param)

# This should never be reached
raise RuntimeError("Unknown classification")
Expand Down
13 changes: 11 additions & 2 deletions cle/backends/elf/parser/AMD64/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,17 @@ def classify(typ, count=0, die=None, return_classification=False, allocator=None
if count > 0 or typ.get("class") == "Pointer":
cls = classify_pointer(count)

elif typ["class"] in ["Scalar", "Integer", "Integral", "Float", "Boolean"]:
elif typ["class"] in [
"Scalar",
"Integer",
"Integral",
"Float",
"ComplexFloat",
"Boolean",
]:
cls = classify_scalar(typ)
elif typ["class"] == "Enum":
cls = classify_enum(typ)
elif typ["class"] == "Struct":
cls = classify_struct(typ, allocator=allocator)
elif typ["class"] == "Union":
Expand Down Expand Up @@ -320,7 +329,7 @@ def classify_array(typ, allocator):


def classify_enum(typ):
return Classification("Enum", RegisterClass.INTEGER, RegisterClass.NO_CLASS)
return Classification("Enum", [RegisterClass.INTEGER, RegisterClass.NO_CLASS])


def classify_function(typ):
Expand Down
30 changes: 22 additions & 8 deletions cle/backends/elf/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class ClassType:
"int": "Integer",
"long int": "Integer",
"unsigned int": "Integer",
"unsigned char": "Integral",
"signed char": "Integral",
"short unsigned int": "Integer",
"long long int": "Integer",
Expand All @@ -22,20 +23,33 @@ class ClassType:
"long double": "Float",
}

patterns = {
"int": "Integer",
"char": "Integral",
"float": "Float",
"double": "Float",
}
@classmethod
def get(cls, typename):
"""
Given a class name, return the type
"""
if typename not in cls.types:
print("classtype")
print(typename)
import IPython
classname = None
for pattern in cls.patterns:
if pattern in typename:
classname = cls.patterns[pattern]
break

if not classname:
if typename not in cls.types:
print("classtype")
print(typename)
import IPython

IPython.embed()
name = cls.types[typename]
IPython.embed()
classname = cls.types[typename]

# Prefix with complex
if "complex" in typename.lower():
return "Complex%s" % name
return name
return "Complex%s" % classname
return classname
6 changes: 6 additions & 0 deletions dwarfdump.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

# usage ./dwarfdump.sh fixed-sized-array/lib.so
# The path is relative to examples

docker run -it -v $PWD:/code -it gcc:12.1 bash -c "apt-get update && apt-get install -y dwarfdump && dwarfdump /code/examples/$1"
2 changes: 2 additions & 0 deletions examples/allocation/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
all:
g++ -g -Wl,-E example.cpp -o example

0 comments on commit 7d7ac38

Please sign in to comment.