Skip to content

Commit

Permalink
more reverse
Browse files Browse the repository at this point in the history
  • Loading branch information
trolldbois committed Oct 28, 2015
1 parent 7d3833a commit ee9de26
Show file tree
Hide file tree
Showing 11 changed files with 210 additions and 38 deletions.
3 changes: 3 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ Changes to architecture.txt:
2015-10:
- cleaning up the reverse API
- Fix sigsegv bug source in reset_mappings()
- Fix the double linked reverser
- Cleaned up the reverse/structure module
- Added a fun constraints heuristics on reversed types's field.

2015-09:
- Working on Volatility dump reader
Expand Down
1 change: 0 additions & 1 deletion haystack/mappings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
log = logging.getLogger('memorybase')



class AMemoryMapping(interfaces.IMemoryMapping):

"""
Expand Down
11 changes: 6 additions & 5 deletions haystack/reverse/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,27 @@
SIGNATURES_FILENAME = 'signatures'
WORDS_FOR_REVERSE_TYPES_FILE = 'data/words.100'


def create_cache_folder_name(dumpname):
folder = get_cache_folder_name(dumpname)
if not os.access(folder, os.F_OK):
os.mkdir(folder)
return


def remove_cache_folder(dumpname):
folder = get_cache_folder_name(dumpname)
if os.access(folder, os.F_OK):
shutil.rmtree(folder)
return


def get_cache_folder_name(dumpname):
root = os.path.abspath(dumpname)
return os.path.sep.join([root, CACHE_NAME])

def get_cache_filename(typ, dumpname, address):

def get_cache_filename(typ, dumpname, address=None):
"""
Returns a filename for caching a type of data based on the dump filename.
Expand All @@ -70,12 +74,9 @@ def get_cache_filename(typ, dumpname, address):
fname = typ
if address is not None:
fname = '%x.%s' % (address, typ)
else:
log.warning('usage without uuid')
import pdb
pdb.set_trace()
return os.path.sep.join([get_cache_folder_name(dumpname), fname])


def get_record_cache_folder_name(dumpname):
"""
Returns a dirname for caching the structures based on the dump filename.
Expand Down
41 changes: 38 additions & 3 deletions haystack/reverse/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,43 @@ def __init__(self, memory_handler):
# create the cache folder then
self.reset_cache_folder()

# def get_functions_pointers(self):
# try:
# return self.get_cache_radare()
# except IOError as e:
# return self.save_cache_radare()
#
# def get_cache_radare(self):
# dumpname = self.memory_handler.get_name()
# fname = config.get_cache_filename(config.CACHE_FUNCTION_NAMES, dumpname)
# functions = None
# try:
# with file(fname, 'r') as fin:
# functions = pickle.load(fin)
# except EOFError as e:
# os.remove(fname)
# log.error('Error in the radare cache file. File cleaned. Please restart.')
# raise RuntimeError('Error in the radare cache file. File cleaned. Please restart.')
# return functions
#
# def save_cache_radare(self):
# from haystack.reverse.heuristics import radare
# func = radare.RadareAnalysis(self.memory_handler)
# func.init_all_functions()
# import code
# code.interact(local=locals())
# dumpname = self.memory_handler.get_name()
# fname = config.get_cache_filename(config.CACHE_FUNCTION_NAMES, dumpname)
# with file(fname, 'w') as fout:
# pickle.dump(func.functions, fout)
# return func.functions

def reset_cache_folder(self):
"""Removes the cache folder"""
dumpname = self.memory_handler.get_name()
# create the cache folder
cache_folder = config.get_cache_folder_name(dumpname)
config.remove_cache_folder(self.memory_handler.get_name())
# config.remove_cache_folder(self.memory_handler.get_name())
if not os.access(cache_folder, os.F_OK):
os.mkdir(cache_folder)
log.info("[+] Cache created in %s", cache_folder)
Expand All @@ -64,7 +95,7 @@ def get_context_for_heap(self, heap):
if not isinstance(heap, interfaces.IMemoryMapping):
raise TypeError('heap should be a IMemoryMapping')
if not heap.is_marked_as_heap():
raise TypeError('heap should be a heap')
raise TypeError('heap should be a heap: %s', heap)
if heap.get_marked_heap_address() not in self.__contextes:
heap_context = self.make_context_for_heap(heap)
self._set_context_for_heap(heap, heap_context)
Expand All @@ -84,7 +115,7 @@ def add_reversed_type(self, typename, t):
self.__reversed_types[typename] = t

def list_reversed_types(self):
return self.__reversed_types.values()
return self.__reversed_types.keys()

# was get_context
def make_context_for_heap(self, heap):
Expand All @@ -102,6 +133,10 @@ def make_context_for_heap(self, heap):
ctx = HeapContext(self.memory_handler, heap)
return ctx

def get_filename_cache_headers(self):
dumpname = self.memory_handler.get_name()
return config.get_cache_filename(config.CACHE_GENERATED_PY_HEADERS_VALUES, dumpname)


class HeapContext(object):
"""
Expand Down
10 changes: 9 additions & 1 deletion haystack/reverse/heuristics/dsa.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ def make_fields(self, structure, offset, size):
size -= self._word_size
offset += self._word_size
continue
# 20151026 - if aligned, ignore it
if value % self._target.get_word_size():
size -= self._word_size
offset += self._word_size
continue
# we have a pointer
log.debug('checkPointer offset:%s value:%s' % (offset, hex(value)))
field = fieldtypes.PointerField('ptr_%d' % offset, offset, self._word_size)
Expand Down Expand Up @@ -238,7 +243,10 @@ class FieldReverser(model.AbstractReverser):
IntegerFields: if the word value is small ( |x| < 65535 )
PointerFields: if the word if a possible pointer value
If the word content does not match theses heuristics, tag the fiel has unknown.
If the word content does not match theses heuristics, tag the field has unknown.
TODO: UTF16 array corrector, if utf16 field is preceded by smallint, aggregate both in utf16,
event if not aligned.
"""
REVERSE_LEVEL = 10

Expand Down
9 changes: 9 additions & 0 deletions haystack/reverse/heuristics/pointertypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from haystack.reverse import context
from haystack.reverse.heuristics import model
from haystack.reverse.heuristics import radare

log = logging.getLogger("pointertypes")

Expand All @@ -20,6 +21,11 @@ class PointerFieldReverser(model.AbstractReverser):
"""
REVERSE_LEVEL = 50

def __init__(self, _memory_handler):
super(PointerFieldReverser, self).__init__(_memory_handler)
# process_context = self._memory_handler.get_reverse_context()
# self.__functions_pointers = process_context.get_functions_pointers()

def reverse_record(self, _context, _record):
"""
@returns structure, with enriched info on pointer fields.
Expand Down Expand Up @@ -55,6 +61,9 @@ def reverse_record(self, _context, _record):
field.set_pointee_ctype('void')
# TODO: Function pointer ?
field.name = 'ptr_ext_lib_%d' % field.offset
# if value in self.__functions_pointers:
# size, bbs, name = self.__functions_pointers[value]
# field.name = 'func_ptr_%s_%d' % (name, field.offset)
continue
tgt = None
try:
Expand Down
79 changes: 69 additions & 10 deletions haystack/reverse/heuristics/reversers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from haystack.abc import interfaces
from haystack.reverse import config
from haystack.reverse import context
from haystack.reverse import structure
from haystack.reverse import fieldtypes
from haystack.reverse import utils
Expand Down Expand Up @@ -76,7 +77,7 @@ def reverse_context(self, _context):
#
self._todo = sorted(set(self._allocations) - set(self._done_records))
self._fromcache = len(self._allocations) - len(self._todo)
log.info('[+] Adding new raw structures from getUserAllocations cached contents - %d todo', len(self._todo))
log.info('[+] Adding new raw structures from user allocations - %d todo', len(self._todo))
super(BasicCachingReverser, self).reverse_context(_context)

def reverse_record(self, _context, _record):
Expand Down Expand Up @@ -388,10 +389,11 @@ def rename_record_type(self, _members, offset):
heap = self._memory_handler.get_mapping_for_address(list_item_addr)
_context = self._process_context.get_context_for_heap(heap)
_item = _context.get_record_for_address(list_item_addr)
### KEEP THIS
if len(_item) != len(_record):
log.warning("x2 linked reverser: len(_item) != len(_record)")
else:
_item.set_record_type(_record_type)
_item.set_record_type(_record_type, True)

# push the LIST_ENTRY type into the context/memory_handler
rev_context = self._memory_handler.get_reverse_context()
Expand All @@ -411,6 +413,15 @@ def debug_lists(self):
for _list in res:
log.debug("%s items:\t[%s]", len(_list), ','.join([hex(addr) for addr in _list]))

def rename_all_lists(self):
# rename all lists
for size, offset_lists in self.lists.items():
for offset, multiple_lists in offset_lists.items():
for members_list in multiple_lists:
nb = len(members_list)
rt = self.rename_record_type(members_list, offset)
log.debug('%d members for : %s', nb, rt.to_string())


class PointerGraphReverser(model.AbstractReverser):
"""
Expand Down Expand Up @@ -474,7 +485,11 @@ def reverse_record(self, heap_context, _record):
self._master_graph.add_edge(hex(_record.address), hex(pointee_addr))
# but we only feed the heaps graph if the target is known
heap = self._memory_handler.get_mapping_for_address(pointee_addr)
heap_context = self._memory_handler.get_reverse_context().get_context_for_heap(heap)
try:
heap_context = context.get_context_for_address(self._memory_handler, pointee_addr)
except ValueError as e:
continue
#heap_context = self._memory_handler.get_reverse_context().get_context_for_heap(heap)
if heap_context is None:
continue
try:
Expand All @@ -490,6 +505,10 @@ def reverse_record(self, heap_context, _record):
class ArrayFieldsReverser(model.AbstractReverser):
"""
Aggregate fields of similar type into arrays in the record.
Check d4008 in zeus. nice array
d2008 is a major player
90688 too
"""
REVERSE_LEVEL = 200

Expand Down Expand Up @@ -660,6 +679,38 @@ def refreshOne(context, ptr_value):
return mystruct


def save_process_headers(memory_handler):
"""
Save the python class code definition to file.
:param ctx:
:return:
"""
process_context = memory_handler.get_reverse_context()
log.info('[+] saving headers for process')
fout = open(process_context.get_filename_cache_headers(), 'w')
towrite = []
#
for r_type in process_context.list_reversed_types():
members = process_context.get_reversed_type(r_type)
from haystack.reverse.heuristics import constraints
rev = constraints.ConstraintsReverser(memory_handler)
txt = rev.verify(r_type, members)
towrite.extend(txt)
towrite.append("# %d members" % len(members))
towrite.append(r_type.to_string())
if len(towrite) >= 10000:
try:
fout.write('\n'.join(towrite))
except UnicodeDecodeError as e:
print 'ERROR on ', r_type
towrite = []
fout.flush()
fout.write('\n'.join(towrite))
fout.close()
return


def save_headers(ctx, addrs=None):
"""
Save the python class code definition to file.
Expand Down Expand Up @@ -700,18 +751,20 @@ def reverse_heap(memory_handler, heap_addr):
:return:
"""
from haystack.reverse import context
log.debug('[+] Loading the memory dump for HEAP 0x%x', heap_addr)
log.info('[+] Loading the memory dump for HEAP 0x%x', heap_addr)
ctx = context.get_context_for_address(memory_handler, heap_addr)
try:
# decode bytes contents to find basic types.
log.debug('Reversing Fields')
log.info('Reversing Fields')
fr = dsa.FieldReverser(memory_handler)
fr.reverse_context(ctx)

# try to find some logical constructs.
log.debug('Reversing DoubleLinkedListReverser')
log.info('Reversing DoubleLinkedListReverser')
# why is this a reverse_context ?
doublelink = DoubleLinkedListReverser(memory_handler)
doublelink.reverse_context(ctx)
doublelink.rename_all_lists()

# save to file
save_headers(ctx)
Expand All @@ -735,9 +788,9 @@ def reverse_instances(memory_handler):
:return:
"""
assert isinstance(memory_handler, interfaces.IMemoryHandler)
if True:
if False:
# decode bytes contents to find basic types.
log.debug('Reversing Fields')
log.info('Reversing Fields')
fr = dsa.FieldReverser(memory_handler)
fr.reverse()
# try to find some logical constructs.
Expand All @@ -754,21 +807,27 @@ def reverse_instances(memory_handler):

# then and only then can we look at the PointerFields
# identify pointer relation between structures
log.debug('Reversing PointerFields')
log.info('Reversing PointerFields')
pfr = pointertypes.PointerFieldReverser(memory_handler)
pfr.reverse()

# TODO save process type record

# save that
for heap in heaps:
ctx = memory_handler.get_reverse_context().get_context_for_heap(heap)
ctx.save_structures()
# save to file
save_headers(ctx)

save_process_headers(memory_handler)

# and then
# graph pointer relations between structures
log.debug('Reversing PointerGraph')
log.info('Reversing PointerGraph')
ptrgraph = PointerGraphReverser(memory_handler)
ptrgraph.reverse()

# todo save graph method

return
Loading

0 comments on commit ee9de26

Please sign in to comment.