Skip to content

Commit

Permalink
Working on a better reversing order
Browse files Browse the repository at this point in the history
  • Loading branch information
trolldbois committed Oct 14, 2015
1 parent 411a5e9 commit 2c41e06
Show file tree
Hide file tree
Showing 16 changed files with 632 additions and 337 deletions.
91 changes: 39 additions & 52 deletions haystack/reverse/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,14 @@
from haystack.reverse import matchers
from haystack.reverse import enumerators

"""
This is a controller to parse allocated chunk from memory and
guess/reverse the record and its field member types.
"""


log = logging.getLogger('context')


class ReverserContext(object):
"""
TODO: Change Name to MmapReverserContext
add methods for chained mmap
Add check for context, only on valid heaps ( getHeaps)
The ReverserContext is a stateful instance around a Heap.
The context contains cache helpers around the reversing of records.
"""

def __init__(self, memory_handler, heap):
Expand All @@ -39,8 +33,6 @@ def __init__(self, memory_handler, heap):
self.dumpname = memory_handler.get_name()
self.heap = heap
self._heap_start = heap.start
self._word_size = self.memory_handler.get_target_platform().get_word_size()
self.parsed = set()
self._function_names = dict()
# refresh heap pointers list and allocators chunks
self._reversedTypes = dict()
Expand All @@ -49,6 +41,7 @@ def __init__(self, memory_handler, heap):
return

def _init2(self):
log.info('[+] ReverserContext on heap 0x%x', self.heap.get_marked_heap_address())
# Check that cache folder exists
if not os.access(config.get_cache_folder_name(self.dumpname), os.F_OK):
os.mkdir(config.get_cache_folder_name(self.dumpname))
Expand Down Expand Up @@ -80,29 +73,29 @@ def _is_record_cache_dirty(self):
return self._structures is None or len(self._structures) != len(self._structures_addresses)

# TODO implement a LRU cache
def _get_structures(self):
def _list_records(self):
if not self._is_record_cache_dirty():
return self._structures

# otherwise cache Load
log.info('[+] Loading cached structures list')
log.info('[+] Loading cached records list')
self._structures = dict(
[(long(vaddr), s) for vaddr, s in structure.cache_load_all_lazy(self)])
log.info('[+] Loaded %d cached structures addresses from disk', len(self._structures))
log.info('[+] Loaded %d cached records addresses from disk', len(self._structures))

# If we are missing some structures from the cache loading
# then recreated them in cache from Allocated memory
nb_missing = len(self._structures_addresses) - len(self._structures)
if nb_missing != 0:
import reversers

log.info('[+] Missing cached structures %d' % nb_missing)
log.info('[+] Missing cached records %d' % nb_missing)
if nb_missing < 10:
log.warning('TO check missing:%d unique: %d', nb_missing, len(set(self._structures_addresses) - set(self._structures)))
# use BasicCachingReverser to get user blocks
cache_reverse = reversers.BasicCachingReverser(self)
_ = cache_reverse.reverse()
log.info('[+] Built %d/%d structures from allocations',
log.info('[+] Built %d/%d records from allocations',
len(self._structures),
len(self._structures_addresses))
return self._structures
Expand All @@ -120,7 +113,7 @@ def get_record_size_for_address(self, addr):
def get_record_count(self):
if self._is_record_cache_dirty():
# refresh the cache
return len(self._get_structures())
return len(self._list_records())
return len(self._structures_addresses)

def get_record_address_at_address(self, _address):
Expand Down Expand Up @@ -151,7 +144,7 @@ def get_record_for_address(self, addr):
:param addr:
:return:
"""
return self._get_structures()[addr]
return self._list_records()[addr]

def listOffsetsForPointerValue(self, ptr_value):
'''Returns the list of offsets where this value has been found'''
Expand All @@ -169,7 +162,7 @@ def listStructuresAddrForPointerValue(self, ptr_value):

def listStructuresForPointerValue(self, ptr_value):
'''Returns the list of structures with a member with this pointer value '''
return [self._get_structures()[addr]
return [self._list_records()[addr]
for addr in self.listStructuresAddrForPointerValue(ptr_value)]

def list_allocations_addresses(self):
Expand All @@ -179,10 +172,10 @@ def list_allocations_sizes(self):
return map(long, self._structures_sizes)

def listStructuresAddresses(self):
return map(long, self._get_structures().keys())
return map(long, self._list_records().keys())

def listStructures(self):
return self._get_structures().values()
return self._list_records().values()

def is_known_address(self, address):
return address in self._structures_addresses
Expand All @@ -199,6 +192,12 @@ def listReversedTypes(self):
return self._reversedTypes.values()

# name of cache files
def get_folder_cache(self):
return config.get_cache_folder_name(self.dumpname)

def get_folder_cache_structures(self):
return config.get_record_cache_folder_name(self.dumpname)

def get_filename_cache_context(self):
return config.get_cache_filename(config.CACHE_CONTEXT, self.dumpname, self._heap_start)

Expand All @@ -208,9 +207,6 @@ def get_filename_cache_headers(self):
def get_filename_cache_graph(self):
return config.get_cache_filename(config.CACHE_GRAPH, self.dumpname, self._heap_start)

def get_folder_cache_structures(self):
return config.get_record_cache_folder_name(self.dumpname)

def get_filename_cache_pointers_addresses(self):
return config.get_cache_filename(config.CACHE_HEAP_ADDRS, self.dumpname, self._heap_start)

Expand All @@ -223,9 +219,12 @@ def get_filename_cache_allocations_addresses(self):
def get_filename_cache_allocations_sizes(self):
return config.get_cache_filename(config.CACHE_MALLOC_CHUNKS_SIZES, self.dumpname, self._heap_start)

def get_filename_cache_signatures(self):
return config.get_cache_filename(config.CACHE_SIGNATURE_GROUPS_DIR, self.dumpname, self._heap_start)

def get_heap_pointers(self):
"""
UNUSED
@UNUSED
Search Heap pointers values in stack and heap.
records values and pointers address in heap.
Expand All @@ -235,8 +234,9 @@ def get_heap_pointers(self):
"""
feedback = searchers.NoFeedback()
matcher = matchers.PointerEnumerator(self.memory_handler)
enumerator = enumerators.WordAlignedEnumerator(self.heap, matcher, feedback, self._word_size)
return utils._get_cache_heap_pointers(self, enumerator)
word_size = self.memory_handler.get_target_platform().get_word_size()
enumerator = enumerators.WordAlignedEnumerator(self.heap, matcher, feedback, word_size)
return utils.get_cache_heap_pointers(self, enumerator)

def get_heap_pointers_from_allocated(self, heap_walker):
"""
Expand All @@ -249,8 +249,9 @@ def get_heap_pointers_from_allocated(self, heap_walker):
"""
feedback = searchers.NoFeedback()
matcher = matchers.PointerEnumerator(self.memory_handler)
enumerator = enumerators.AllocatedWordAlignedEnumerator(heap_walker, matcher, feedback, self._word_size)
return utils._get_cache_heap_pointers(self, enumerator)
word_size = self.memory_handler.get_target_platform().get_word_size()
enumerator = enumerators.AllocatedWordAlignedEnumerator(heap_walker, matcher, feedback, word_size)
return utils.get_cache_heap_pointers(self, enumerator)

@classmethod
def cacheLoad(cls, memory_handler, heap_addr):
Expand All @@ -277,7 +278,7 @@ def cacheLoad(cls, memory_handler, heap_addr):

def save(self):
# we only need dumpfilename to reload _memory_handler, addresses to reload
# cached structures
# cached records
cache_context_filename = self.get_filename_cache_context()
try:
with file(cache_context_filename, 'w') as fout:
Expand Down Expand Up @@ -308,38 +309,23 @@ def reset(self):
def __getstate__(self):
"""The important things to pickle are:
dumpname
parsed
_heap_start
Ignore the rest
"""
# FIXME, double check and delete
#d = self.__dict__.copy()
#del d['_memory_handler']
#del d['heap']
#del d['_structures']
#del d['_structures_addresses']
##del d['_pointers_values']
##del d['_pointers_offsets']
#del d['_malloc_addresses']
#del d['_malloc_sizes']
d = dict()
d['dumpname'] = self.__dict__['dumpname']
d['parsed'] = self.__dict__['parsed']
d['_heap_start'] = self.__dict__['_heap_start']
d['_word_size'] = self.__dict__['_word_size']
return d

def __setstate__(self, d):
self.dumpname = d['dumpname']
self.parsed = d['parsed']
self._heap_start = d['_heap_start']
self._word_size = d['_word_size']
self._structures = None
self._function_names = dict()
return

def save_structures(self):
tl = time.time()
t0 = time.time()
if self._structures is None:
log.debug('No loading has been done, not saving anything')
return
Expand All @@ -350,13 +336,14 @@ def save_structures(self):
except KeyboardInterrupt as e:
os.remove(s.fname)
raise e
if time.time() - tl > 30: # i>0 and i%10000 == 0:
t0 = time.time()
log.info('\t\t - %2.2f secondes to go ', (len(self._structures) - i) * ((tl - t0) / i))
tl = t0
if time.time() - t0 > 30: # i>0 and i%10000 == 0:
tl = time.time()
rate = (tl - t0) / (1 + i)
_ttg = (len(self._structures) - i) * rate
log.info('\t\t - %2.2f seconds to go', _ttg)
t0 = tl
tf = time.time()
log.info('\t[.] saved in %2.2f secs' % (tf - tl))

log.info('\t[.] saved in %2.2f secs' % (tf - t0))


# FIXME - get context should be on memory_handler.
Expand All @@ -371,13 +358,13 @@ def get_context(fname, heap_addr):
ctx = ReverserContext.cacheLoad(memory_handler, heap_addr)
except IOError as e:
finder = memory_handler.get_heap_finder()
# force generation of heaps.
heaps = finder.get_heap_mappings()
heap = memory_handler.get_mapping_for_address(heap_addr)
ctx = ReverserContext(memory_handler, heap)
return ctx



def get_context_for_address(memory_handler, address):
"""
Returns the haystack.reverse.context.ReverserContext of the process
Expand Down
33 changes: 18 additions & 15 deletions haystack/reverse/fieldtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def makeStructField(cls, parent, offset, fields):
import structure
_address = parent.address + offset
newfieldType = FieldTypeStruct('%lx' % _address, fields)
newfieldType.setStruct(structure.AnonymousRecord(parent.context, _address, len(newfieldType)))
newfieldType.setStruct(structure.AnonymousRecord(parent._memory_handler, _address, len(newfieldType)))
newField = Field(parent, offset, newfieldType, len(newfieldType), False)
return newField

Expand All @@ -90,7 +90,7 @@ class FieldTypeStruct(FieldType):
"""

def __init__(self, name, fields):
super(FieldTypeStruct, self).__init__(0x1, 'struct', name, 'K', isPtr=False)
super(FieldTypeStruct, self).__init__(self, 0x1, 'struct', name, 'K', isPtr=False)
self.size = sum([len(f) for f in fields])
self.elements = fields
# TODO s2[0].elements[0].typename.elements[0] is no good
Expand All @@ -106,9 +106,12 @@ def __len__(self):


class FieldTypeArray(FieldType):

"""
An array type
"""
def __init__(self, basicTypeName):
FieldType.__init__(self, 0x60, 'array_%s' % basicTypeName, None, 'a', isPtr=False)
super(FieldTypeArray, self).__init__(self, 0x60, 'array_%s' % basicTypeName, None, 'a', isPtr=False)


# setup all the know types that are interesting to us
FieldType.UNKNOWN = FieldType(0x0, 'untyped', 'ctypes.c_ubyte', ctypes.c_ubyte, 'u')
Expand Down Expand Up @@ -188,7 +191,7 @@ def get_ctype(self):
return self._ctype
# FIXME TODO

def getTypename(self):
def get_typename(self):
if self.is_string() or self.is_zeroes():
return '%s * %d' % (self.typename.ctypes, len(self))
elif self.is_array():
Expand Down Expand Up @@ -287,19 +290,16 @@ def _getValue(self, maxLen):
elif self.is_zeroes():
bytes = repr(self.value) # '\\x00'*len(self)
elif self.is_array():
log.warning('ARRAY in Field type, %s' % self.typename)
log.error(
'error in 0x%x offset 0x%x' %
(self.struct.address, self.offset))
bytes = ''.join(
['[', ','.join([el.to_string() for el in self.elements]), ']'])
log.warning('ARRAY in Field type, %s', self.typename)
log.error('error in 0x%x offset 0x%x', self.struct.address, self.offset)
bytes = ''.join(['[', ','.join([el.to_string() for el in self.elements]), ']'])
elif self.padding or self.typename == FieldType.UNKNOWN:
bytes = self.struct.bytes[self.offset:self.offset + len(self)]
else: # bytearray, pointer...
return self.value
return bytes

def getSignature(self):
def get_signature(self):
return (self.typename, self.size)

def to_string(self, prefix=''):
Expand Down Expand Up @@ -329,7 +329,7 @@ def to_string(self, prefix=''):
self.comment, self.usercomment, repr(self.getValue(config.commentMaxSize)))

fstr = "%s( '%s' , %s ), %s\n" % (
prefix, self.get_name(), self.getTypename(), comment)
prefix, self.get_name(), self.get_typename(), comment)
return fstr

def __getstate__(self):
Expand Down Expand Up @@ -397,6 +397,9 @@ def __init__(self, astruct, elements):
self.basicTypename = elements[0].typename

self.size = self.basicTypeSize * len(self.elements)

super(ArrayField, self).__init__(astruct, self.offset, self.typename, self.size, False)

self.padding = False
self.value = None
self.comment = ''
Expand All @@ -409,7 +412,7 @@ def is_array(self):
def get_ctype(self):
return self._ctype

def getTypename(self):
def get_typename(self):
return '%s * %d' % (self.basicTypename.ctypes, self.nbElements)

def _getValue(self, maxLen):
Expand All @@ -428,7 +431,7 @@ def to_string(self, prefix):
comment = '# %s %s array:%s' % (
self.comment, self.usercomment, self.getValue(config.commentMaxSize))
fstr = "%s( '%s' , %s ), %s\n" % (
prefix, self.get_name(), self.getTypename(), comment)
prefix, self.get_name(), self.get_typename(), comment)
return fstr


Expand Down

0 comments on commit 2c41e06

Please sign in to comment.