Skip to content

Commit

Permalink
still working on windows heaps
Browse files Browse the repository at this point in the history
  • Loading branch information
trolldbois committed Aug 28, 2015
1 parent 7edc72d commit 01b61e9
Show file tree
Hide file tree
Showing 35 changed files with 498 additions and 300 deletions.
9 changes: 9 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ Changes to architecture.txt:
- Ran 225 tests in 519.059s
OK (skipped=7, expected failures=4)
- Profiling... Ran 225 tests in 160.300s
- heaps are not necessarly at offset 0 in mapping
- more work on Windows XP heap based on the Zeus vmem extract of a process
- cleaned the memory handler to stop being a big bag of everything
- remove reverse's context from core haystack
- added a script to find heaps in Windows dumps
- start standardizing test dumps in a test file inventory
Ran 240 tests in 119.036s
FAILED (failures=2, errors=9, skipped=11, expected failures=3)




Expand Down
11 changes: 11 additions & 0 deletions TODO
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@

- following memory_handler changes, the __book cache seems to forget about some ctypes buffers.
not cool. on get_context.
Function getRef deactivated. it doesnt bug anymore.
it impacts performance _a lot_ to deactivate the cache.
To be obseleted ?
But output_to_python depends on it... TODO


- use PEB search to double check that we find all HEAPs in standard scenarios.
- orient winheap to work with heap, not with heap aligned on mapping.start.

- FIX winXP x86 heapwalker, then work on vol.

- use pycallgraph to cProfile a HEAP validation.
Expand Down
29 changes: 10 additions & 19 deletions haystack/abc/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,14 @@ def __contains__(self, address):
def __len__(self):
raise NotImplementedError(self)

def mark_as_heap(self, addr):
raise NotImplementedError(self)

def is_marked_as_heap(self):
raise NotImplementedError(self)

def get_marked_heap_address(self):
raise NotImplementedError(self)

class IMemoryLoader(object):
"""Parse a process memory _memory_handler from a storage concept,
Expand All @@ -108,6 +116,7 @@ def get_name(self):
"""Returns the name of the process memory dump we are analysing"""
raise NotImplementedError(self)

# helper methods that do not impact the internals
def get_target_platform(self):
"""Returns the ITargetPlatform for that process memory."""
raise NotImplementedError(self)
Expand All @@ -116,10 +125,6 @@ def get_heap_finder(self):
"""Returns the IHeapFinder for that process memory."""
raise NotImplementedError(self)

def get_heap_walker(self, heap):
"""Returns the IHeapWalker for that process memory."""
raise NotImplementedError(self)

def get_ctypes_utils(self):
"""Returns the Utils toolkit."""
raise NotImplementedError(self)
Expand All @@ -128,7 +133,7 @@ def get_model(self):
"""Returns the Model cache."""
raise NotImplementedError(self)

# helper methods that should maybe be moved
# class proper methods
def get_mappings(self):
"""
return the list of IMemoryMapping
Expand All @@ -151,20 +156,6 @@ def iter_mapping_with_name(self, pathname):
"""Returns the IMemoryMapping _memory_handler with the name pathname"""
raise NotImplementedError(self)

# MOVE TO HEAP FINDER
def get_heap(self):
"""Returns the first IMemoryMapping heaps"""
raise NotImplementedError(self)

def get_heaps(self):
"""Returns all IMemoryMapping heaps"""
raise NotImplementedError(self)

def get_stack(self):
"""Returns the IMemoryMapping identified as the stack"""
raise NotImplementedError(self)
# MOVE TO HEAP FINDER

def is_valid_address(self, obj, structType=None):
"""Return true is the virtual address is a valid address in a IMemoryMapping"""
raise NotImplementedError(self)
Expand Down
4 changes: 2 additions & 2 deletions haystack/basicmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def _load_member(self, record, attr, attrname, attrtype, record_constraints, max
'should not happen')
return False
ref = self._memory_handler.getRef(self._ctypes.CString, attr_obj_address)
if ref:
if ref is not None:
log.debug("%s %s loading from references cache %s/0x%lx", attrname,
attr, self._ctypes.CString, attr_obj_address)
return True
Expand Down Expand Up @@ -482,7 +482,7 @@ def _load_member(self, record, attr, attrname, attrtype, record_constraints, max
return True

ref = self._memory_handler.getRef(_attrType, attr_obj_address)
if ref:
if ref is not None:
log.debug(
"%s %s loading from references cache %s/0x%lx",
attrname, attr, _attrType, attr_obj_address)
Expand Down
7 changes: 4 additions & 3 deletions haystack/gui/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,15 +459,16 @@ def openDump(self):

def _openDump(self, dumpname):
# load memorymapping
mappings = dump_loader.load(dumpname)
memory_handler = dump_loader.load(dumpname)
# TODO : make a mapping chooser
heap = mappings.get_heap()
finder = memory_handler.get_heap_finder()
heap = memory_handler.get_heap_mappings()[0]
# if len(_memory_handler) > 1:
# heap = [m for m in _memory_handler if m.pathname == '[heap]'][0]
# else:
# heap = _memory_handler[0]
return self.make_memory_tab(
os.path.sep.join([os.path.basename(dumpname), heap.pathname]), heap, mappings)
os.path.sep.join([os.path.basename(dumpname), heap.pathname]), heap, memory_handler)

def closeTab(self):
self.tabWidget.removeTab(self.tabWidget.currentIndex())
Expand Down
10 changes: 9 additions & 1 deletion haystack/listmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,9 @@ def _iterate_list_from_field_inner(self, iterator_fn, head, pointee_record_type,
# use cache if possible, avoid loops.
st = self._memory_handler.getRef(pointee_record_type, list_member_address)
# st._orig_address_ = link
if st:
if st is not None:
# we return
log.debug('_iterate_list_from_field_inner getRef returned cached value on 0x%x', list_member_address)
yield st
else:
memoryMap = self._memory_handler.is_valid_address_value(list_member_address, pointee_record_type)
Expand All @@ -404,6 +405,8 @@ def _iterate_double_linked_list(self, record, sentinels=None):
"""
# stop when Null
done = [0]
if sentinels is None:
sentinels = []
obj = record
record_type = type(record)
# we ignore the sentinels here, as this is an internal iterator
Expand All @@ -424,6 +427,10 @@ def _iterate_double_linked_list(self, record, sentinels=None):
st._orig_address_ = addr
self._memory_handler.keepRef(st, record_type, addr)
log.debug("keepRefx2 %s.%s: @%x", record_type.__name__, fieldname, addr)
## DEBUG
#import traceback
#print traceback.print_stack()
## DEBUG
yield addr
# next
link = getattr(st, fieldname)
Expand Down Expand Up @@ -579,6 +586,7 @@ def _load_list_entries(self, record, link_iterator, max_depth):

for list_member in link_iterator:
# load the list entry structure members
log.debug('send %s to load_members' % list_member)
if not self.load_members(list_member, max_depth - 1):
log.error(
'Error while loading members on %s',
Expand Down
100 changes: 27 additions & 73 deletions haystack/mappings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ def __init__(self, start, end, permissions, offset,
self.minor_device = minor_device
self.inode = inode
self.pathname = str(pathname) # fix None
self._is_heap = False
self._is_heap_addr = None

def get_target_platform(self):
"""
Expand Down Expand Up @@ -177,6 +179,16 @@ def _ptov(self, paddr):
vaddr = paddr - pstart
return vaddr

def mark_as_heap(self, addr):
self._is_heap = True
self._is_heap_addr = addr

def is_marked_as_heap(self):
return self._is_heap is True

def get_marked_heap_address(self):
return self._is_heap_addr

# ---- to implement if needed
def read_word(self, address):
raise NotImplementedError(self)
Expand Down Expand Up @@ -216,20 +228,14 @@ def __init__(self, mapping_list, _target, name):
m.set_target_platform(self._target)
self._utils = self._target.get_target_ctypes_utils()
self.__name = name
# FIXME book keeper move to context
# book register to keep references to ctypes memory buffers
self.__book = _book()
self.__model = model.Model(self)
# FIXME reduce open files.
self.__required_maps = []
# finish initialization
self._heap_finder = self._set_heap_finder()
self._heap_finder = None
self.__optim_get_mapping_for_address()
self.__optim_heaps = None

def _set_heap_finder(self):
"""set the IHeapFinder for that process memory."""
return heapwalker.make_heap_finder(self)

def get_name(self):
"""Returns the name of the process memory dump we are analysing"""
Expand All @@ -241,14 +247,10 @@ def get_target_platform(self):

def get_heap_finder(self):
"""Returns the IHeapFinder for that process memory."""
if self._heap_finder is None:
self._heap_finder = heapwalker.make_heap_finder(self)
return self._heap_finder

def get_heap_walker(self, heap):
"""Returns the IHeapWalker for that process memory."""
if not isinstance(heap, interfaces.IMemoryMapping):
raise TypeError("heap should be a IMemoryMapping")
return self.get_heap_finder().get_heap_walker(heap)

def get_ctypes_utils(self):
"""Returns the Utils toolkit."""
return self._utils
Expand All @@ -257,41 +259,6 @@ def get_model(self):
"""Returns the Model cache."""
return self.__model

# FIXME remove/move to subclass
# move to a IContextHandler
def get_context(self, addr):
"""Returns the haystack.reverse.context.ReverserContext of this dump.
"""
assert isinstance(addr, long) or isinstance(addr, int)
mmap = self.get_mapping_for_address(addr)
if not mmap:
raise ValueError
if hasattr(mmap, '_context'):
# print '** _context exists'
return mmap._context
if mmap not in self.get_heaps(): # addr is not a heap addr,
found = False
# or its in a child heap ( win7)
for h in self.get_heaps():
if hasattr(h, '_children'):
if mmap in h._children:
found = True
mmap = h
break
if not found:
raise ValueError
# we found the heap mmap or its parent
from haystack.reverse import context
try:
ctx = context.ReverserContext.cacheLoad(self)
# print '** CACHELOADED'
except IOError as e:
ctx = context.ReverserContext(self, mmap)
# print '** newly loaded '
# cache it
mmap._context = ctx
return ctx

# FIXME incorrect API
def _get_mapping(self, pathname):
mmap = None
Expand All @@ -312,6 +279,13 @@ def reset_mappings(self):
for m in self.get_mappings():
m.reset()

def __optim_get_mapping_for_address(self):
self.__optim_get_mapping_for_address_cache = dict()
for m in self.get_mappings():
for i in range(m.start, m.end, 0x1000):
self.__optim_get_mapping_for_address_cache[i] = m
return

def get_mapping_for_address(self, vaddr):
# TODO: optimization. 127s out of 288s = 40%
assert isinstance(vaddr, long) or isinstance(vaddr, int)
Expand All @@ -324,31 +298,6 @@ def get_mapping_for_address(self, vaddr):
return self.__optim_get_mapping_for_address_cache[_boundary_addr]
return False

def __optim_get_mapping_for_address(self):
self.__optim_get_mapping_for_address_cache = dict()
for m in self._mappings:
for i in range(m.start, m.end, 0x1000):
self.__optim_get_mapping_for_address_cache[i] = m
return

def get_heap(self):
"""Returns the first Heap"""
return self.get_heaps()[0]

def get_heaps(self):
"""Find heap type and returns _memory_handler with heaps"""
if not self.__optim_heaps:
# optimize heaps
self.__optim_heaps = self.get_heap_finder().get_heap_mappings()
return self.__optim_heaps

return

def get_stack(self):
# FIXME wont work on windows.
stack = self._get_mapping('[stack]')[0]
return stack

def is_valid_address(self, obj, structType=None): # FIXME is valid pointer
"""
:param obj: the obj to evaluate.
Expand Down Expand Up @@ -435,6 +384,8 @@ def hasRef(self, typ, origAddr):

def getRef(self, typ, origAddr):
"""Returns the reference to the type previously loaded at this address"""
## DEBUG
#return None
if (typ, origAddr) in self.__book.refs:
return self.__book.getRef(typ, origAddr)
return None
Expand All @@ -452,6 +403,9 @@ def keepRef(self, obj, typ=None, origAddr=None):
Sometypes, your have to cast a c_void_p, You can keep ref in Ctypes object,
they might be transient (if obj == somepointer.contents)."""
## DEBUG
##return None

# TODO, memory leak for different objects of same size, overlapping
# struct.
if (typ, origAddr) in self.__book.refs:
Expand Down
4 changes: 3 additions & 1 deletion haystack/memory_dumper.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@ def _dump_all_mappings(self, destdir):
err = 0
# print '\n'.join([str(m) for m in self._memory_handler])
if self._compact_dump:
self.__required = self.mappings.get_heaps()
finder = self.mappings.get_heap_finder()
self.__required = finder.get_heap_mappings()
# FIXME
self.__required.append(self.mappings.get_stack())
for m in self.mappings:
try:
Expand Down
Loading

0 comments on commit 01b61e9

Please sign in to comment.