Skip to content

Commit

Permalink
more pain
Browse files Browse the repository at this point in the history
Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Jul 8, 2022
1 parent 92fd38b commit 9b2186f
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 55 deletions.
1 change: 1 addition & 0 deletions README-SMEAGLE.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ These are cases with gcc 12.1 dwarf and the library here that I can't resolve. I


## TODO:
- stopped at function-as-parameter (maybe done?)
- Add to tests:
- what happens if you pass an unnamed reference or pointer - does dwarf say it gets space or?
- Test cases for different kinds of unions
Expand Down
52 changes: 34 additions & 18 deletions cle/backends/elf/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import cle.backends.elf.parser as abi_parser
from .location import get_register_from_expr, get_dwarf_from_expr
from .types import ClassType, types
from .types import ClassType
from .variable_type import VariableType
from ..corpus import Corpus
from .decorator import cache_type
Expand Down Expand Up @@ -38,18 +38,31 @@ def create_location_lookup(res):
return lookup


def add_direction(param, is_struct=False):
def add_direction(param, types, is_struct=False):
"""
Add direction to a normal parameter
"""
if param.get("class") == "Pointer":
is_pointer = param.get("class") == "Pointer"

# Look for pointers as far as we can go
if not is_pointer:
holder = param
while "type" in holder and len(holder["type"]) == 32:
holder = types[holder["type"]]
if holder.get("class") == "Pointer":
is_pointer = True
break
while "underlying_type" in holder:
holder = holder["underlying_type"]

if is_pointer:
param["direction"] = "both"
else:
param["direction"] = "import"
return param


def update_underlying_type(param, lookup):
def update_underlying_type(param, lookup, types):
"""
Given some kind of underlying type, match fields to locations.
"""
Expand All @@ -75,6 +88,7 @@ class ElfCorpus(Corpus):
"""

def __init__(self, *args, **kwargs):

self.loc_parser = None
self.arch = kwargs.get("arch")
self.parser = getattr(abi_parser, self.arch.name, None)
Expand All @@ -98,7 +112,7 @@ def add_locations(self):
for _, var in self.variables.items():
if "type" not in var:
continue
underlying_type = copy.deepcopy(types[var["type"]])
underlying_type = copy.deepcopy(self.types[var["type"]])
if underlying_type.get("class") == "Struct":
for field in underlying_type.get("fields", []):
field["direction"] = var.get("direction", "export")
Expand All @@ -122,7 +136,7 @@ def add_locations(self):
lookup = create_location_lookup(loc)
if func["return"]["type"] in lookup:
func["return"]["type"] = update_underlying_type(
func["return"], lookup
func["return"], lookup, types=self.types
)
# TODO what about function return type

Expand All @@ -137,13 +151,13 @@ def get_function_pointer(self, param, func, order):
return

# This might not be true, we check with underlying type
pointer_type = types[param["type"]]
pointer_type = self.types[param["type"]]
if "underlying_type" not in pointer_type:
return
underlying_type = pointer_type["underlying_type"].get("type")
if not underlying_type or underlying_type not in types:
if not underlying_type or underlying_type not in self.types:
return
underlying_type = types[underlying_type]
underlying_type = self.types[underlying_type]
if underlying_type.get("class") == "Function":
name = func.get("name", "unknown") + "_func_pointer_" + str(order)
if underlying_type.get("name") != "unknown":
Expand All @@ -157,6 +171,7 @@ def add_locations_func(self, func):
"""
allocator = self.parser.get_allocator()
for order, param in enumerate(func.get("parameters", [])):

res = self.parse_location(param, allocator)

# Check if param type is pointer -> function
Expand All @@ -168,6 +183,9 @@ def add_locations_func(self, func):
if not res:
continue

# Pointers go in both directions
param = add_direction(param, types=self.types)

# A non-aggregate
if isinstance(res, str):
param["location"] = res
Expand All @@ -177,10 +195,7 @@ def add_locations_func(self, func):

# Res is a classification with eighbytes we unwrap
# Try just unwrapping the top level for now
param["type"] = update_underlying_type(param, lookup)

# Pointers go in both directions
param["type"] = add_direction(param["type"])
param["type"] = update_underlying_type(param, lookup, types=self.types)

def parse_location(self, entry, allocator):
"""
Expand All @@ -198,10 +213,12 @@ def parse_location(self, entry, allocator):
% self.arch.name
)

underlying_type = types.get(entry.get("type"))
underlying_type = self.types.get(entry.get("type"))
if not underlying_type:
return
return self.parser.classify(underlying_type, allocator=allocator)
return self.parser.classify(
underlying_type, allocator=allocator, types=self.types
)

def parse_variable(self, die, flags=None):
"""
Expand Down Expand Up @@ -786,8 +803,7 @@ def parse_array_type(self, die, parent=None, flags=None):
)

entry = self.add_flags(entry, flags)
if "type" in array_type:
entry["type"] = array_type["type"]
entry["underlying_type"] = array_type
return entry

def parse_enumeration_type(self, die, flags=None):
Expand Down Expand Up @@ -938,7 +954,7 @@ def parse_typedef(self, die, flags=None):

# Add the size to the typedef (shouldn't change)
while "size" not in ut and "type" in ut and len(ut["type"]) == 32:
ut = types[ut["type"]]
ut = self.types[ut["type"]]

entry["underlying_type"] = ut
entry["size"] = ut.get("size")
Expand Down
3 changes: 1 addition & 2 deletions cle/backends/elf/decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
__license__ = "Apache-2.0"

from functools import partial, update_wrapper
from .types import types

import json
import hashlib
Expand Down Expand Up @@ -52,7 +51,7 @@ def __call__(self, cls, *args, **kwargs):
uid = self.hash(typ)

# Top level types holds the uid -> type
types[uid] = typ
cls.types[uid] = typ

# _types holds lookup of die offset to uid
cls._types[die.offset] = uid
Expand Down
2 changes: 0 additions & 2 deletions cle/backends/elf/elf.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,6 @@ def __init__(
if self.corpus:
# Post process to add locations
self.corpus.add_locations()
from .types import types
self.corpus.types = types

#
# Properties and Public Methods
Expand Down
57 changes: 29 additions & 28 deletions cle/backends/elf/parser/AMD64/classifier.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .register_class import RegisterClass
from .allocators import RegisterAllocator
from ...types import ClassType, types
from ...types import ClassType

import json
import hashlib
Expand Down Expand Up @@ -35,19 +35,6 @@ def do_print(self):
print("{%s,%s}" % (f.get("name"), f.get("size", 0)))


def update_types(typ):
"""
As we add register classes, we generate new types. Ideally
we can separate the types from the registers, but this is a quick
fix for now.
"""
global types
dumped = json.dumps(typ, sort_keys=True)
uid = hashlib.md5(dumped.encode("utf-8")).hexdigest()
types[uid] = typ
return types


def classify_pointer():
return Classification("Pointer", RegisterClass.INTEGER)

Expand All @@ -56,11 +43,12 @@ def classify_reference():
return Classification("Reference", RegisterClass.INTEGER)


def classify(typ, return_classification=False, allocator=None):
def classify(typ, return_classification=False, allocator=None, types=None):
"""
Main entrypoint to classify something - we return a location string (for non
aggregate types) OR an updated types that includes new locations for aggregates.
"""
types = types or {}

# Don't handle this case right now
if not typ or "class" not in typ or typ["class"] in ["Unknown", "ComplexUnknown"]:
Expand All @@ -71,7 +59,11 @@ def classify(typ, return_classification=False, allocator=None):

# TypeDefs without class get underlying type
if typ.get("class") == "TypeDef":
classname = typ["underlying_type"]["class"]
classtyp = typ
while "underlying_type" in classtyp:
classtyp = classtyp["underlying_type"]
if "class" in classtyp:
classname = classtyp["class"]

if classname == "Pointer":
cls = classify_pointer()
Expand All @@ -90,18 +82,18 @@ def classify(typ, return_classification=False, allocator=None):
elif classname == "Enum":
cls = classify_enum(typ)
elif classname == "Struct":
cls = classify_struct(typ, allocator=allocator)
cls = classify_struct(typ, allocator=allocator, types=types)
elif classname == "Union":
cls = classify_union(typ, allocator=allocator)
elif classname == "Array":
cls = classify_array(typ, allocator=allocator)
cls = classify_array(typ, allocator=allocator, types=types)

# If we don't know the underlying type
if not cls:
return

elif classname == "Class":
cls = classify_class(typ, allocator=allocator)
cls = classify_class(typ, allocator=allocator, types=types)
elif classname == "Function":

# Functions that aren't pointers
Expand Down Expand Up @@ -260,19 +252,27 @@ def post_merge(lo, hi, size):
return lo, hi


def classify_struct(typ, allocator=None, return_classification=False):
return classify_aggregate(typ, allocator, return_classification, "Struct")
def classify_struct(typ, allocator=None, return_classification=False, types=None):
return classify_aggregate(
typ, allocator, return_classification, "Struct", types=types
)


def classify_class(typ, allocator=None, return_classification=False):
return classify_aggregate(typ, allocator, return_classification, "Class")
def classify_class(typ, allocator=None, return_classification=False, types=None):
return classify_aggregate(
typ, allocator, return_classification, "Class", types=types
)


def classify_aggregate(
typ, allocator=None, return_classification=False, aggregate="Struct"
typ,
allocator=None,
return_classification=False,
aggregate="Struct",
types=None,
):
size = typ.get("size", 0)
global types
types = types or {}

# If an object is larger than eight eightbyes (i.e., 64) class MEMORY.
# Note there is a double check here because we don't have faith in the size field
Expand Down Expand Up @@ -364,15 +364,16 @@ def classify_union(typ, allocator):
return Classification("Union", RegisterClass.MEMORY)


def classify_array(typ, allocator):
def classify_array(typ, allocator, types=None):
size = typ.get("size", 0)
global types
types = types or {}

# If size > 64 or unaligned fields, class memory
if size > 64:
return Classification("Array", RegisterClass.MEMORY)

typename = typ.get("type")
# Array has underlying type
typename = typ.get("underlying_type", {}).get("type")
classname = None

# regular class id or pointer
Expand Down
3 changes: 0 additions & 3 deletions cle/backends/elf/types.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
from elftools.dwarf.die import DIE

# Global cache of types
types = {}

class ClassType:
types = {
"int": "Integer",
Expand Down
2 changes: 0 additions & 2 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
examples_dir = os.path.abspath(args[-1])
else:
examples_dir = os.path.join(here, "examples")
sys.path.insert(0, here)


sys.path.insert(0, here)

Expand Down

0 comments on commit 9b2186f

Please sign in to comment.