diff --git a/haystack/reverse/context.py b/haystack/reverse/context.py index c09e727b..f8925ce1 100644 --- a/haystack/reverse/context.py +++ b/haystack/reverse/context.py @@ -16,20 +16,14 @@ from haystack.reverse import matchers from haystack.reverse import enumerators -""" -This is a controller to parse allocated chunk from memory and - guess/reverse the record and its field member types. -""" - log = logging.getLogger('context') class ReverserContext(object): """ - TODO: Change Name to MmapReverserContext - add methods for chained mmap - Add check for context, only on valid heaps ( getHeaps) + The ReverserContext is a stateful instance around a Heap. + The context contains cache helpers around the reversing of records. """ def __init__(self, memory_handler, heap): @@ -39,8 +33,6 @@ def __init__(self, memory_handler, heap): self.dumpname = memory_handler.get_name() self.heap = heap self._heap_start = heap.start - self._word_size = self.memory_handler.get_target_platform().get_word_size() - self.parsed = set() self._function_names = dict() # refresh heap pointers list and allocators chunks self._reversedTypes = dict() @@ -49,6 +41,7 @@ def __init__(self, memory_handler, heap): return def _init2(self): + log.info('[+] ReverserContext on heap 0x%x', self.heap.get_marked_heap_address()) # Check that cache folder exists if not os.access(config.get_cache_folder_name(self.dumpname), os.F_OK): os.mkdir(config.get_cache_folder_name(self.dumpname)) @@ -80,15 +73,15 @@ def _is_record_cache_dirty(self): return self._structures is None or len(self._structures) != len(self._structures_addresses) # TODO implement a LRU cache - def _get_structures(self): + def _list_records(self): if not self._is_record_cache_dirty(): return self._structures # otherwise cache Load - log.info('[+] Loading cached structures list') + log.info('[+] Loading cached records list') self._structures = dict( [(long(vaddr), s) for vaddr, s in structure.cache_load_all_lazy(self)]) - log.info('[+] Loaded %d cached structures addresses from disk', len(self._structures)) + log.info('[+] Loaded %d cached records addresses from disk', len(self._structures)) # If we are missing some structures from the cache loading # then recreated them in cache from Allocated memory @@ -96,13 +89,13 @@ def _get_structures(self): if nb_missing != 0: import reversers - log.info('[+] Missing cached structures %d' % nb_missing) + log.info('[+] Missing cached records %d' % nb_missing) if nb_missing < 10: log.warning('TO check missing:%d unique: %d', nb_missing, len(set(self._structures_addresses) - set(self._structures))) # use BasicCachingReverser to get user blocks cache_reverse = reversers.BasicCachingReverser(self) _ = cache_reverse.reverse() - log.info('[+] Built %d/%d structures from allocations', + log.info('[+] Built %d/%d records from allocations', len(self._structures), len(self._structures_addresses)) return self._structures @@ -120,7 +113,7 @@ def get_record_size_for_address(self, addr): def get_record_count(self): if self._is_record_cache_dirty(): # refresh the cache - return len(self._get_structures()) + return len(self._list_records()) return len(self._structures_addresses) def get_record_address_at_address(self, _address): @@ -151,7 +144,7 @@ def get_record_for_address(self, addr): :param addr: :return: """ - return self._get_structures()[addr] + return self._list_records()[addr] def listOffsetsForPointerValue(self, ptr_value): '''Returns the list of offsets where this value has been found''' @@ -169,7 +162,7 @@ def listStructuresAddrForPointerValue(self, ptr_value): def listStructuresForPointerValue(self, ptr_value): '''Returns the list of structures with a member with this pointer value ''' - return [self._get_structures()[addr] + return [self._list_records()[addr] for addr in self.listStructuresAddrForPointerValue(ptr_value)] def list_allocations_addresses(self): @@ -179,10 +172,10 @@ def list_allocations_sizes(self): return map(long, self._structures_sizes) def listStructuresAddresses(self): - return map(long, self._get_structures().keys()) + return map(long, self._list_records().keys()) def listStructures(self): - return self._get_structures().values() + return self._list_records().values() def is_known_address(self, address): return address in self._structures_addresses @@ -199,6 +192,12 @@ def listReversedTypes(self): return self._reversedTypes.values() # name of cache files + def get_folder_cache(self): + return config.get_cache_folder_name(self.dumpname) + + def get_folder_cache_structures(self): + return config.get_record_cache_folder_name(self.dumpname) + def get_filename_cache_context(self): return config.get_cache_filename(config.CACHE_CONTEXT, self.dumpname, self._heap_start) @@ -208,9 +207,6 @@ def get_filename_cache_headers(self): def get_filename_cache_graph(self): return config.get_cache_filename(config.CACHE_GRAPH, self.dumpname, self._heap_start) - def get_folder_cache_structures(self): - return config.get_record_cache_folder_name(self.dumpname) - def get_filename_cache_pointers_addresses(self): return config.get_cache_filename(config.CACHE_HEAP_ADDRS, self.dumpname, self._heap_start) @@ -223,9 +219,12 @@ def get_filename_cache_allocations_addresses(self): def get_filename_cache_allocations_sizes(self): return config.get_cache_filename(config.CACHE_MALLOC_CHUNKS_SIZES, self.dumpname, self._heap_start) + def get_filename_cache_signatures(self): + return config.get_cache_filename(config.CACHE_SIGNATURE_GROUPS_DIR, self.dumpname, self._heap_start) + def get_heap_pointers(self): """ - UNUSED + @UNUSED Search Heap pointers values in stack and heap. records values and pointers address in heap. @@ -235,8 +234,9 @@ def get_heap_pointers(self): """ feedback = searchers.NoFeedback() matcher = matchers.PointerEnumerator(self.memory_handler) - enumerator = enumerators.WordAlignedEnumerator(self.heap, matcher, feedback, self._word_size) - return utils._get_cache_heap_pointers(self, enumerator) + word_size = self.memory_handler.get_target_platform().get_word_size() + enumerator = enumerators.WordAlignedEnumerator(self.heap, matcher, feedback, word_size) + return utils.get_cache_heap_pointers(self, enumerator) def get_heap_pointers_from_allocated(self, heap_walker): """ @@ -249,8 +249,9 @@ def get_heap_pointers_from_allocated(self, heap_walker): """ feedback = searchers.NoFeedback() matcher = matchers.PointerEnumerator(self.memory_handler) - enumerator = enumerators.AllocatedWordAlignedEnumerator(heap_walker, matcher, feedback, self._word_size) - return utils._get_cache_heap_pointers(self, enumerator) + word_size = self.memory_handler.get_target_platform().get_word_size() + enumerator = enumerators.AllocatedWordAlignedEnumerator(heap_walker, matcher, feedback, word_size) + return utils.get_cache_heap_pointers(self, enumerator) @classmethod def cacheLoad(cls, memory_handler, heap_addr): @@ -277,7 +278,7 @@ def cacheLoad(cls, memory_handler, heap_addr): def save(self): # we only need dumpfilename to reload _memory_handler, addresses to reload - # cached structures + # cached records cache_context_filename = self.get_filename_cache_context() try: with file(cache_context_filename, 'w') as fout: @@ -308,38 +309,23 @@ def reset(self): def __getstate__(self): """The important things to pickle are: dumpname - parsed _heap_start Ignore the rest """ - # FIXME, double check and delete - #d = self.__dict__.copy() - #del d['_memory_handler'] - #del d['heap'] - #del d['_structures'] - #del d['_structures_addresses'] - ##del d['_pointers_values'] - ##del d['_pointers_offsets'] - #del d['_malloc_addresses'] - #del d['_malloc_sizes'] d = dict() d['dumpname'] = self.__dict__['dumpname'] - d['parsed'] = self.__dict__['parsed'] d['_heap_start'] = self.__dict__['_heap_start'] - d['_word_size'] = self.__dict__['_word_size'] return d def __setstate__(self, d): self.dumpname = d['dumpname'] - self.parsed = d['parsed'] self._heap_start = d['_heap_start'] - self._word_size = d['_word_size'] self._structures = None self._function_names = dict() return def save_structures(self): - tl = time.time() + t0 = time.time() if self._structures is None: log.debug('No loading has been done, not saving anything') return @@ -350,13 +336,14 @@ def save_structures(self): except KeyboardInterrupt as e: os.remove(s.fname) raise e - if time.time() - tl > 30: # i>0 and i%10000 == 0: - t0 = time.time() - log.info('\t\t - %2.2f secondes to go ', (len(self._structures) - i) * ((tl - t0) / i)) - tl = t0 + if time.time() - t0 > 30: # i>0 and i%10000 == 0: + tl = time.time() + rate = (tl - t0) / (1 + i) + _ttg = (len(self._structures) - i) * rate + log.info('\t\t - %2.2f seconds to go', _ttg) + t0 = tl tf = time.time() - log.info('\t[.] saved in %2.2f secs' % (tf - tl)) - + log.info('\t[.] saved in %2.2f secs' % (tf - t0)) # FIXME - get context should be on memory_handler. @@ -371,13 +358,13 @@ def get_context(fname, heap_addr): ctx = ReverserContext.cacheLoad(memory_handler, heap_addr) except IOError as e: finder = memory_handler.get_heap_finder() + # force generation of heaps. heaps = finder.get_heap_mappings() heap = memory_handler.get_mapping_for_address(heap_addr) ctx = ReverserContext(memory_handler, heap) return ctx - def get_context_for_address(memory_handler, address): """ Returns the haystack.reverse.context.ReverserContext of the process diff --git a/haystack/reverse/fieldtypes.py b/haystack/reverse/fieldtypes.py index 41a06c89..2194f025 100644 --- a/haystack/reverse/fieldtypes.py +++ b/haystack/reverse/fieldtypes.py @@ -63,7 +63,7 @@ def makeStructField(cls, parent, offset, fields): import structure _address = parent.address + offset newfieldType = FieldTypeStruct('%lx' % _address, fields) - newfieldType.setStruct(structure.AnonymousRecord(parent.context, _address, len(newfieldType))) + newfieldType.setStruct(structure.AnonymousRecord(parent._memory_handler, _address, len(newfieldType))) newField = Field(parent, offset, newfieldType, len(newfieldType), False) return newField @@ -90,7 +90,7 @@ class FieldTypeStruct(FieldType): """ def __init__(self, name, fields): - super(FieldTypeStruct, self).__init__(0x1, 'struct', name, 'K', isPtr=False) + super(FieldTypeStruct, self).__init__(self, 0x1, 'struct', name, 'K', isPtr=False) self.size = sum([len(f) for f in fields]) self.elements = fields # TODO s2[0].elements[0].typename.elements[0] is no good @@ -106,9 +106,12 @@ def __len__(self): class FieldTypeArray(FieldType): - + """ + An array type + """ def __init__(self, basicTypeName): - FieldType.__init__(self, 0x60, 'array_%s' % basicTypeName, None, 'a', isPtr=False) + super(FieldTypeArray, self).__init__(self, 0x60, 'array_%s' % basicTypeName, None, 'a', isPtr=False) + # setup all the know types that are interesting to us FieldType.UNKNOWN = FieldType(0x0, 'untyped', 'ctypes.c_ubyte', ctypes.c_ubyte, 'u') @@ -188,7 +191,7 @@ def get_ctype(self): return self._ctype # FIXME TODO - def getTypename(self): + def get_typename(self): if self.is_string() or self.is_zeroes(): return '%s * %d' % (self.typename.ctypes, len(self)) elif self.is_array(): @@ -287,19 +290,16 @@ def _getValue(self, maxLen): elif self.is_zeroes(): bytes = repr(self.value) # '\\x00'*len(self) elif self.is_array(): - log.warning('ARRAY in Field type, %s' % self.typename) - log.error( - 'error in 0x%x offset 0x%x' % - (self.struct.address, self.offset)) - bytes = ''.join( - ['[', ','.join([el.to_string() for el in self.elements]), ']']) + log.warning('ARRAY in Field type, %s', self.typename) + log.error('error in 0x%x offset 0x%x', self.struct.address, self.offset) + bytes = ''.join(['[', ','.join([el.to_string() for el in self.elements]), ']']) elif self.padding or self.typename == FieldType.UNKNOWN: bytes = self.struct.bytes[self.offset:self.offset + len(self)] else: # bytearray, pointer... return self.value return bytes - def getSignature(self): + def get_signature(self): return (self.typename, self.size) def to_string(self, prefix=''): @@ -329,7 +329,7 @@ def to_string(self, prefix=''): self.comment, self.usercomment, repr(self.getValue(config.commentMaxSize))) fstr = "%s( '%s' , %s ), %s\n" % ( - prefix, self.get_name(), self.getTypename(), comment) + prefix, self.get_name(), self.get_typename(), comment) return fstr def __getstate__(self): @@ -397,6 +397,9 @@ def __init__(self, astruct, elements): self.basicTypename = elements[0].typename self.size = self.basicTypeSize * len(self.elements) + + super(ArrayField, self).__init__(astruct, self.offset, self.typename, self.size, False) + self.padding = False self.value = None self.comment = '' @@ -409,7 +412,7 @@ def is_array(self): def get_ctype(self): return self._ctype - def getTypename(self): + def get_typename(self): return '%s * %d' % (self.basicTypename.ctypes, self.nbElements) def _getValue(self, maxLen): @@ -428,7 +431,7 @@ def to_string(self, prefix): comment = '# %s %s array:%s' % ( self.comment, self.usercomment, self.getValue(config.commentMaxSize)) fstr = "%s( '%s' , %s ), %s\n" % ( - prefix, self.get_name(), self.getTypename(), comment) + prefix, self.get_name(), self.get_typename(), comment) return fstr diff --git a/haystack/reverse/heuristics/dsa.py b/haystack/reverse/heuristics/dsa.py index 14a15a26..2448c0f4 100644 --- a/haystack/reverse/heuristics/dsa.py +++ b/haystack/reverse/heuristics/dsa.py @@ -6,7 +6,7 @@ import logging import array - +import collections import os from haystack.reverse import re_string @@ -285,16 +285,17 @@ def analyze_fields(self, structure): fields, gaps = self._analyze(structure) structure.add_fields(fields) structure.add_fields(gaps) # , FieldType.UNKNOWN - structure.set_resolved() return structure - def _analyze(self, structure): - slen = len(structure) + def _analyze(self, _record): + slen = len(_record) offset = 0 # call on analyzers fields = [] nb = -1 - gaps = [Field(structure, 0, FieldType.UNKNOWN, len(structure), False)] + gaps = [Field(_record, 0, FieldType.UNKNOWN, len(_record), False)] + + _record.set_reverse_level(10) # find zeroes # find strings @@ -307,27 +308,27 @@ def _analyze(self, structure): fields.append(field) continue log.debug('Using %s on %d:%d', analyser.__class__.__name__, field.offset, field.offset + len(field)) - new_fields = analyser.make_fields(structure, field.offset, len(field)) + new_fields = analyser.make_fields(_record, field.offset, len(field)) fields.extend(new_fields) for f1 in new_fields: log.debug('new_field %s', f1) # print fields if len(fields) != nb: # no change in fields, keep gaps nb = len(fields) - gaps = self._make_gaps(structure, fields) + gaps = self._make_gaps(_record, fields) if len(gaps) == 0: return fields, gaps return fields, gaps - def _make_gaps(self, structure, fields): + def _make_gaps(self, _record, fields): fields.sort() gaps = [] nextoffset = 0 for i, f in enumerate(fields): if f.offset > nextoffset: # add temp padding field - self._aligned_gaps(structure, f.offset, nextoffset, gaps) + self._aligned_gaps(_record, f.offset, nextoffset, gaps) elif f.offset < nextoffset: - log.debug(structure) + log.debug(_record) log.debug(f) log.debug('%s < %s ' % (f.offset, nextoffset)) log.debug(fields[i + 1]) @@ -337,34 +338,34 @@ def _make_gaps(self, structure, fields): # do next field nextoffset = f.offset + len(f) # conclude on QUEUE insertion - lastfield_size = len(structure) - nextoffset + lastfield_size = len(_record) - nextoffset if lastfield_size > 0: if lastfield_size < self._target.get_word_size(): - gap = Field(structure, nextoffset, FieldType.UNKNOWN, lastfield_size, True) + gap = Field(_record, nextoffset, FieldType.UNKNOWN, lastfield_size, True) log.debug('_make_gaps: adding last field at offset %d:%d', gap.offset, gap.offset + len(gap)) gaps.append(gap) else: - self._aligned_gaps(structure, len(structure), nextoffset, gaps) + self._aligned_gaps(_record, len(_record), nextoffset, gaps) return gaps - def _aligned_gaps(self, structure, endoffset, nextoffset, gaps): + def _aligned_gaps(self, _record, endoffset, nextoffset, gaps): """ if nextoffset is aligned add a gap to gaps, or if nextoffset is not aligned add (padding + gap) to gaps """ if nextoffset % self._target.get_word_size() == 0: - gap = Field(structure, nextoffset, FieldType.UNKNOWN, endoffset - nextoffset, False) + gap = Field(_record, nextoffset, FieldType.UNKNOWN, endoffset - nextoffset, False) log.debug('_make_gaps: adding field at offset %d:%d', gap.offset, gap.offset + len(gap)) gaps.append(gap) else: # unaligned field should be splitted s1 = self._target.get_word_size() - nextoffset % self._target.get_word_size() - gap1 = Field(structure, nextoffset, FieldType.UNKNOWN, s1, True) + gap1 = Field(_record, nextoffset, FieldType.UNKNOWN, s1, True) log.debug('_make_gaps: Unaligned field at offset %d:%d', gap1.offset, gap1.offset + len(gap1)) gaps.append(gap1) if nextoffset + s1 < endoffset: - gap2 = Field(structure, nextoffset + s1, FieldType.UNKNOWN, endoffset - nextoffset - s1, False) + gap2 = Field(_record, nextoffset + s1, FieldType.UNKNOWN, endoffset - nextoffset - s1, False) log.debug('_make_gaps: adding field at offset %d:%d', gap2.offset, gap2.offset + len(gap2)) gaps.append(gap2) return @@ -380,7 +381,8 @@ class EnrichedPointerFields(StructureAnalyser): """ def analyze_fields(self, structure): - """ @returns structure, with enriched info on pointer fields. + """ + @returns structure, with enriched info on pointer fields. For pointer fields value: (-) if pointer value is in _memory_handler ( well it is... otherwise it would not be a pointer.) + if value is unaligned, mark it as cheesy @@ -404,11 +406,9 @@ def analyze_fields(self, structure): # + ask _memory_handler for the context for that value try: ctx = context.get_context_for_address(self._memory_handler, value) # no error expected. - #log.warning('value: 0x%0.8x ctx.heap: 0x%0.8x'%(value, ctx.heap.start)) - # print '** ST id', id(structure), hex(structure.address) # + ask context for the target structure or code info except ValueError as e: - log.debug('target to non heap mmaps is not implemented') + # value is a pointer, but not to a heap. m = self._memory_handler.get_mapping_for_address(value) field.set_child_desc('ext_lib @%0.8x %s' % (m.start, m.pathname)) field._ptr_to_ext_lib = True @@ -428,12 +428,14 @@ def analyze_fields(self, structure): field.set_name('ptr_void') continue # structure found + log.debug('Looking at child id:0x%x str:%s', tgt.address, tgt.to_string()) # we always point on structure, not field field.set_child_addr(tgt.address) offset = value - tgt.address try: tgt_field = tgt.get_field_at_offset(offset) # @throws IndexError - except IndexError as e: # there is no field right there + except IndexError as e: + # there is no field right there log.debug('there is no field at pointed value %0.8x. May need splitting byte field - %s', value, e) field.set_child_desc('Badly reversed field') field.set_child_ctype('void') @@ -452,19 +454,6 @@ def analyze_fields(self, structure): # all return - def get_unresolved_children(self, structure): - """ returns all children that are not fully analyzed yet.""" - pointerFields = structure.get_pointer_fields() - children = [] - for field in pointerFields: - try: - tgt = structure._context.get_record_for_address(field.value) - if not tgt.is_resolved(): # fields have not been decoded yet - children.append(tgt) - except KeyError as e: - pass - return children - class IntegerArrayFields(StructureAnalyser): @@ -476,8 +465,7 @@ def make_fields(self, structure, offset, size): size = len(bytes) if size < 4: return False - ctr = collections.Counter( - [bytes[i:i + self._target.get_word_size()] for i in range(len(bytes))]) + ctr = collections.Counter([bytes[i:i + self._target.get_word_size()] for i in range(len(bytes))]) floor = max(1, int(size * .1)) # 10 % variation in values #commons = [ c for c,nb in ctr.most_common() if nb > 2 ] commons = ctr.most_common() @@ -486,6 +474,5 @@ def make_fields(self, structure, offset, size): # few values. it migth be an array self.size = size self.values = bytes - self.comment = '10%% var in values: %s' % ( - ','.join([repr(v) for v, nb in commons])) + self.comment = '10%% var in values: %s' % (','.join([repr(v) for v, nb in commons])) return True diff --git a/haystack/reverse/matchers.py b/haystack/reverse/matchers.py index a70028a5..d8f3a3f1 100644 --- a/haystack/reverse/matchers.py +++ b/haystack/reverse/matchers.py @@ -4,6 +4,7 @@ # Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com # + class AbstractMatcher(object): """ Search for pointers by checking if the word value is a valid addresses in memspace. @@ -24,6 +25,7 @@ def test_match(self, mapping, vaddr): """ raise NotImplementedError + class AbstractMatcherWithValue(object): """ Search for pointers by checking if the word value is a valid addresses in memspace. @@ -44,6 +46,7 @@ def test_match(self, mapping, vaddr): """ raise NotImplementedError + class PointerSearcher(AbstractMatcher): """ Search for pointers by checking if the word value is a valid addresses in memspace. @@ -58,6 +61,7 @@ def test_match(self, mapping, vaddr): return True return False + class NullSearcher(AbstractMatcher): """ Search for Nulls words in memspace. @@ -73,6 +77,7 @@ def test_match(self, mapping, vaddr): return True return False + class PointerEnumerator(AbstractMatcherWithValue): """ Search for pointers by checking if the word value is a valid addresses in memspace. diff --git a/haystack/reverse/reversers.py b/haystack/reverse/reversers.py index 45679f0f..ae51c70f 100644 --- a/haystack/reverse/reversers.py +++ b/haystack/reverse/reversers.py @@ -19,12 +19,13 @@ BasicCachingReverser: use heapwalker to organise heap user allocations chunks into raw records. -RecordReverser: +AbstractRecordReverser: Implement this class when you are delivering a IRecordReverser The reverse method will iterate on all record in a context and call reverse_record FieldReverser: Decode each structure by asserting simple basic types from the byte content. + Text, Pointers, Integers... PointerFieldReverser: Identify pointer fields and their target structure. @@ -84,7 +85,7 @@ def reverse(self): loaded += 1 if size < 0: log.error("Negative allocation size") - mystruct = structure.AnonymousRecord(self._context, ptr_value, size) + mystruct = structure.AnonymousRecord(self._context.memory_handler, ptr_value, size) self._context._structures[ptr_value] = mystruct # cache to disk mystruct.saveme(self._context) @@ -101,11 +102,45 @@ def reverse(self): return -class AbstractReverser(interfaces.IContextReverser): +class AbstractReverser(interfaces.IReverser, interfaces.IContextReverser): + + def __init__(self, _memory_handler): + self._memory_handler = _memory_handler + + def reverse(self): + """ + Go over each record and call the reversing process. + Wraps around some time-based function to ease the wait. + Saves the context to cache at the end. + """ + log.info('[+] %s: START', self) + # run the reverser + for _context in self._memory_handler.get_cached_context(): + self._t0 = time.time() + self._tl = self._t0 + self._nb_reversed = 0 + self._nb_from_cache = 0 + self.reverse_context(_context) + # save the context + _context.save() + # closing statements + return + + def reverse_context(self, _context): + """ + Subclass implementation of the reversing process + + Should iterate over records. + """ + raise NotImplementedError + + +class AbstractContextReverser(AbstractReverser, interfaces.IContextReverser): """ Implements helper wraps """ def __init__(self, _context, _reverse_level): + super(AbstractContextReverser, self).__init__(_context.memory_handler) self._context = _context self._reverse_level = _reverse_level # save to file @@ -133,7 +168,7 @@ def reverse(self): # closing statements total = self._nb_from_cache + self._nb_reversed ts = time.time() - self._t0 - log.info('[+] %s: END %d records in %2.0f (d:%d,c:%d)', self, total, ts, self._nb_reversed, self._nb_from_cache) + log.info('[+] %s: END %d records in %2.0fs (new:%d,cache:%d)', self, total, ts, self._nb_reversed, self._nb_from_cache) return def _callback(self): @@ -142,19 +177,19 @@ def _callback(self): tl = time.time() rate = (tl - self._t0) / (1 + self._nb_reversed + self._nb_from_cache) _ttg = (self._context.get_record_count() - (self._nb_from_cache + self._nb_reversed)) * rate - log.info('%2.2f secondes to go (d:%d,c:%d)', _ttg, self._nb_reversed, self._nb_from_cache) + log.info('%2.2f seconds to go (new:%d,cache:%d)', _ttg, self._nb_reversed, self._nb_from_cache) # write to file - self._fout.write('\n'.join(self._towrite)) - self._towrite = [] - + self._write() return def _append_to_write(self, _content): - self._towrite.append(_content) + #self._towrite.append(_content) + pass def _write(self): - self._fout.write('\n'.join(self._towrite)) - self._towrite = [] + #self._fout.write('\n'.join(self._towrite)) + #self._towrite = [] + pass def __str__(self): return '<%s>' % self.__class__.__name__ @@ -168,15 +203,80 @@ def reverse_context(self, _context): raise NotImplementedError -class DoubleLinkedListReverser(AbstractReverser): +class AbstractRecordReverser(AbstractContextReverser, interfaces.IRecordReverser): + """ + Inherits this class when you are delivering a controller that target structure-based elements. + * Implement reverse_record(self, _record) + """ + def __init__(self, _context, _reverse_level): + super(AbstractRecordReverser, self).__init__(_context, _reverse_level) + + def reverse_context(self, _context): + """ + Go over each record and call the reversing process. + Wraps around some time-based function to ease the wait. + Saves the context to cache at the end. + """ + for _record in _context.listStructures(): + if _record.get_reverse_level() >= self.get_reverse_level(): + # ignore this record. its already reversed. + self._nb_from_cache += 1 + else: + self._nb_reversed += 1 + # call the heuristic + self.reverse_record(_record) + # output headers + self._append_to_write(_record.to_string()) + self._callback() + ## + self._context.save() + return + + def reverse_record(self, _record): + """ + Subclass implementation of the reversing process + + Should set _reverse_level of _record. + """ + raise NotImplementedError + + +class FieldReverser(AbstractRecordReverser): + """ + Decode each structure by asserting simple basic types from the byte content. + + It tries the followings heuristics: + ZeroFields + PrintableAsciiFields + UTF16Fields + IntegerFields + PointerFields + + """ + def __init__(self, _context): + super(FieldReverser, self).__init__(_context, _reverse_level=10) + self._dsa = dsa.DSASimple(self._context.memory_handler) + + def reverse_record(self, _record): + # writing to file + # for ptr_value,anon in context.structures.items(): + self._dsa.analyze_fields(_record) + _record.set_reverse_level(self._reverse_level) + return + + +class DoubleLinkedListReverser(AbstractRecordReverser): """ Identify double Linked list. ( list, vector, ... ) """ def __init__(self, _context): - super(DoubleLinkedListReverser, self).__init__(_context, _reverse_level=20) + super(DoubleLinkedListReverser, self).__init__(_context, _reverse_level=30) self._target = _context.memory_handler.get_target_platform() + self.found = 0 + self.members = set() + self.lists = [] - def reverse_context(self, _context): + def reverse_record(self, _record): """ for i in range(1, len(context.pointers_offsets)): # find two consecutive ptr if context.pointers_offsets[i-1]+context._target_platform.get_word_size() != context.pointers_offsets[i]: @@ -188,32 +288,33 @@ def reverse_context(self, _context): continue # if not head of structure, not a classic DoubleLinkedList ( TODO, think kernel ctypes + offset) """ - if self._context != _context: - raise ValueError("The _context should be the same as initialization time.") - found = 0 - members = set() - lists = [] - for ptr_value in self._context.listStructuresAddresses(): - if ptr_value in members: - # already checked as part of a list - self._nb_from_cache += 1 - return - if self.is_linked_list_member(ptr_value): - head, _members = self.iterate_list(ptr_value) - if _members is not None: - members.update(_members) - self._nb_reversed += len(_members) - 1 - lists.append((head, _members)) # save list chain - # set names - _context.get_record_for_address(head).set_name('list_head') - [_context.get_record_for_address(m).set_name( - 'list_%x_%d' % (head, i)) for i, m in enumerate(_members)] - # TODO get substructures ( P4P4xx ) signature and - # a) extract substructures - # b) group by signature - found += 1 - self._nb_reversed += 1 - self._callback() + # FIXME, we should check any field offset where a,b is a couple of pointer to the same type + if _record.get_reverse_level() >= self.get_reverse_level(): + # ignore this record. its already reversed. + self._nb_from_cache += 1 + else: + for field in _record.get_fields()[:-1]: + ptr_value = field.offset + _record.address + if ptr_value in self.members: + # already checked as part of a list + self._nb_from_cache += 1 + elif self.is_linked_list_member(ptr_value): + head, _members = self.iterate_list(ptr_value) + if _members is not None: + self.members.update(_members) + self._nb_reversed += len(_members) - 1 + self.lists.append((head, _members)) # save list chain + # set names + # FIXME. change name of fields instead.?? + self._context.get_record_for_address(head).set_name('list_head') + [self._context.get_record_for_address(m).set_name( + 'list_%x_%d' % (head, i)) for i, m in enumerate(_members)] + self.found += 1 + log.debug('0x%x is a linked_list_member in a list of %d members', ptr_value, len(_members)) + else: + log.debug('0x%x is not a linked_list_member', ptr_value) + self._nb_reversed += 1 + self._callback() return def is_linked_list_member(self, ptr_value): @@ -291,8 +392,7 @@ def find_list_head(self, members): sizes = sorted([(self._context.get_record_size_for_address(m), m) for m in members]) if sizes[0] < 3 * self._target.get_word_size(): log.error('a double linked list element must be 3 WORD at least') - raise ValueError( - 'a double linked list element must be 3 WORD at least') + raise ValueError('a double linked list element must be 3 WORD at least') numWordSized = [s for s, addr in sizes].count(3 * self._target.get_word_size()) if numWordSized == 1: head = sizes.pop(0)[1] @@ -305,144 +405,148 @@ def find_list_head(self, members): f2, field0 = self.get_two_pointers(addr + self._target.get_word_size()) if field0 == 0: # this could be HEAD. or a 0 value. head = addr - log.debug( - 'We had to guess the HEAD for this linked list %x' % - (addr)) + log.debug('We had to guess the HEAD for this linked list %x', addr) break if head is None: head = sizes[0][1] #raise TypeError('No NULL pointer/HEAD in the double linked list') - log.warning( - 'No NULL pointer/HEAD in the double linked list - head is now %x' % - (head)) + log.warning('No NULL pointer/HEAD in the double linked list - head is now %x', head) return (head, [m for (s, m) in sizes]) -class RecordReverser(AbstractReverser, interfaces.IRecordReverser): - """ - Inherits this class when you are delivering a controller that target structure-based elements. - * Implement reverse_record(self, _record) - """ - def __init__(self, _context, _reverse_level): - super(RecordReverser, self).__init__(_context, _reverse_level) - def reverse_context(self, _context): - """ - Go over each record and call the reversing process. - Wraps around some time-based function to ease the wait. - Saves the context to cache at the end. - """ - for _record in _context.listStructures(): - if _record.get_reverse_level() >= self.get_reverse_level(): - # ignore this record. its already reversed. - self._nb_from_cache += 1 - else: - self._nb_reversed += 1 - # call the heuristic - self.reverse_record(_record) - # output headers - self._append_to_write(_record.to_string()) - self._callback() - ## - self._context.save() - return - - def reverse_record(self, _record): - """ - Subclass implementation of the reversing process - - Should set _reverse_level of _record. - """ - raise NotImplementedError - - -class FieldReverser(RecordReverser): +class PointerFieldReverser(AbstractRecordReverser): """ - Decode each structure by asserting simple basic types from the byte content. + Identify pointer fields and their target structure. - It tries the followings heuristics: - ZeroFields - PrintableAsciiFields - UTF16Fields - IntegerFields - PointerFields + You should call this Reverser only when all heaps have been reversed. """ + def __init__(self, _context): - super(FieldReverser, self).__init__(_context, _reverse_level=30) - self._dsa = dsa.DSASimple(self._context.memory_handler) + super(PointerFieldReverser, self).__init__(_context, _reverse_level=50) + self._pfa = dsa.EnrichedPointerFields(self._context.memory_handler) def reverse_record(self, _record): + # TODO: add minimum reversing level check before running # writing to file # for ptr_value,anon in context.structures.items(): - self._dsa.analyze_fields(_record) + self._pfa.analyze_fields(_record) _record.set_reverse_level(self._reverse_level) return -class PointerFieldReverser(RecordReverser): +class TypeReverser(AbstractContextReverser): """ - Identify pointer fields and their target structure. - - You should call this Reverser only when all heaps have been reverse. - TODO: add minimum reversing level check before running """ def __init__(self, _context): - super(PointerFieldReverser, self).__init__(_context, _reverse_level=50) - self._pfa = dsa.EnrichedPointerFields(self._context.memory_handler) + super(TypeReverser, self).__init__(_context, _reverse_level=80) + self._signatures = [] + + def reverse_context(self, _context): + """ + Go over each record and call the reversing process. + Wraps around some time-based function to ease the wait. + Saves the context to cache at the end. + """ + import Levenshtein + for _record in _context.listStructures(): + self._signatures.append((_record.address, _record.get_signature(True))) + self._nb_reversed += 1 + self._callback() + ## + self._similarities = [] + for i, (addr1, el1) in enumerate(self._signatures[:-1]): + for addr2, el2 in self._signatures[i + 1:]: + lev = Levenshtein.ratio(el1, el2) # seqmatcher ? + if lev > 0.75: + #self._similarities.append( ((addr1,el1),(addr2,el2)) ) + self._similarities.append((addr1, addr2)) + # we do not need the signature. + # check for chains + # TODO we need a group maker with an iterator to push group + # proposition to the user + log.debug('\t[-] Signatures done.') + + for _record in _context.listStructures(): + # do the changes. + self.reverse_record(_record) + self._append_to_write(_record.to_string()) + self._callback() + + self._context.save() + self._write() + return + + def persist(self): + outdir = self._context.get_folder_cache() + if not os.path.isdir(outdir): + os.mkdir(outdir) + if not os.access(outdir, os.W_OK): + raise IOError('cant write to %s' % outdir) + # + outname = self._context.get_filename_cache_signatures() + #outname = os.path.sep.join([outdir, self._name]) + ar = utils.int_array_save(outname, self._similarities) + return + + def load(self): + inname = self._context.get_filename_cache_signatures() + self._similarities = utils.int_array_cache(inname) + return def reverse_record(self, _record): + # TODO: add minimum reversing level check before running # writing to file # for ptr_value,anon in context.structures.items(): - self._pfa.analyze_fields(_record) + #self._pfa.analyze_fields(_record) _record.set_reverse_level(self._reverse_level) return -class PointerGraphReverser(RecordReverser): +class PointerGraphReverser(AbstractReverser): """ use the pointer relation between structure to map a graph. """ - def __init__(self, _context): - super(PointerGraphReverser, self).__init__(_context, _reverse_level=60) + def __init__(self, _memory_handler): + super(PointerGraphReverser, self).__init__(_memory_handler) + import networkx + self._master_graph = networkx.DiGraph() + self._graph = networkx.DiGraph() - def reverse_record(self, context): + def reverse_context(self, _context): import networkx - #import code - # code.interact(local=locals()) - graph = networkx.DiGraph() # we only need the addresses... - graph.add_nodes_from( - ['%x' % k for k in context.listStructuresAddresses()]) - log.info('[+] Graph - added %d nodes' % (graph.number_of_nodes())) + self._graph.add_nodes_from(['%x' % k for k in _context.listStructuresAddresses()]) + log.info('[+] Graph - added %d nodes' % (self._graph.number_of_nodes())) t0 = time.time() tl = t0 - for i, ptr_value in enumerate(context.listStructuresAddresses()): - struct = context.get_record_for_address(ptr_value) - # targets = set(( '%x'%ptr_value, '%x'%child.target_struct_addr ) - # for child in struct.getPointerFields()) #target_struct_addr - # target_struct_addr - targets = set(('%x' % ptr_value, '%x' % child._child_addr) for child in struct.get_pointer_fields()) - # DEBUG - if len(struct.get_pointer_fields()) > 0: - if len(targets) == 0: - raise ValueError - # DEBUG - graph.add_edges_from(targets) - if time.time() - tl > 30: - tl = time.time() - # if decoded else ((tl-t0)/(fromcache)) - rate = ((tl - t0) / (i)) - log.info('%2.2f secondes to go (g:%d)' % ( - (len(graph) - (i)) * rate, i)) - log.info('[+] Graph - added %d edges' % (graph.number_of_edges())) - networkx.readwrite.gexf.write_gexf(graph, context.get_filename_cache_graph()) - context.parsed.add(str(self)) + for _record in _context.listStructures(): + self.reverse_record(_record) + # output headers + # + log.info('[+] Graph - added %d edges', self._graph.number_of_edges()) + networkx.readwrite.gexf.write_gexf(self._graph, _context.get_filename_cache_graph()) + ## + return + + def reverse_record(self, _record): + ptr_value = _record.address + # targets = set(( '%x'%ptr_value, '%x'%child.target_struct_addr ) + # for child in struct.getPointerFields()) #target_struct_addr + # target_struct_addr + targets = set(('%x' % ptr_value, '%x' % child._child_addr) for child in _record.get_pointer_fields()) + # DEBUG + if len(_record.get_pointer_fields()) > 0: + if len(targets) == 0: + raise ValueError + # DEBUG + self._graph.add_edges_from(targets) return -class ArrayFieldsReverser(RecordReverser): +class ArrayFieldsReverser(AbstractRecordReverser): """ Aggregate fields of similar type into arrays in the record. """ @@ -453,8 +557,11 @@ def reverse_record(self, _record): """ Aggregate fields of similar type into arrays in the record. """ - # if not self.resolvedPointers: - # raise ValueError('I should be resolved') + if _record.get_reverse_level() < 30: + raise ValueError('The record reverse level needs to be >30') + + log.debug('0x%x: %s', _record.address, _record.get_signature(text=True)) + _record._dirty = True _record._fields.sort() @@ -508,11 +615,15 @@ def reverse_record(self, _record): raise ValueError("fields patterns len is incorrect %d" % len(fieldTypesAndSizes)) log.debug('done with aggregateFields') - _record._fields = myfields + _record.reset() + _record.add_fields(myfields) + _record.set_reverse_level(self._reverse_level) # print 'final', self.fields + log.debug('0x%x: %s', _record.address, _record.get_signature(text=True)) return -class InlineRecordReverser(RecordReverser): + +class InlineRecordReverser(AbstractRecordReverser): """ Detect record types in a large one . """ @@ -604,7 +715,7 @@ def refreshOne(context, ptr_value): offsets, my_pointers_addrs = utils.dequeue( offsets, ptr_value, ptr_value + size) # save the ref/struct type - mystruct = structure.AnonymousRecord(context, ptr_value, size) + mystruct = structure.AnonymousRecord(context.memory_handler, ptr_value, size) context.structures[ptr_value] = mystruct for p_addr in my_pointers_addrs: f = mystruct.add_field( @@ -628,15 +739,11 @@ def save_headers(ctx, addrs=None): """ # structs_addrs is sorted log.info('[+] saving headers') - fout = file( - config.get_cache_filename( - config.CACHE_GENERATED_PY_HEADERS_VALUES, - ctx.dumpname, ctx._heap_start), - 'w') + fout = open(ctx.get_filename_cache_headers(), 'w') towrite = [] if addrs is None: addrs = iter(ctx.listStructuresAddresses()) - + # for vaddr in addrs: #anon = context._get_structures()[vaddr] anon = ctx.get_record_for_address(vaddr) @@ -667,19 +774,20 @@ def reverse_heap(memory_handler, heap_addr): try: if not os.access(config.get_record_cache_folder_name(ctx.dumpname), os.F_OK): os.mkdir(config.get_record_cache_folder_name(ctx.dumpname)) + log.info("[+] Cache created in %s", config.get_cache_folder_name(ctx.dumpname)) + else: + log.info("[+] Cache exists in %s", config.get_cache_folder_name(ctx.dumpname)) - log.info("[+] Cache created in %s", config.get_cache_folder_name(ctx.dumpname)) + # decode bytes contents to find basic types. + log.debug('Reversing Fields') + fr = FieldReverser(ctx) + fr.reverse() # try to find some logical constructs. log.debug('Reversing DoubleLinkedListReverser') doublelink = DoubleLinkedListReverser(ctx) doublelink.reverse() - # decode bytes contents to find basic types. - log.debug('Reversing Fields') - fr = FieldReverser(ctx) - fr.reverse() - # save to file save_headers(ctx) @@ -710,19 +818,22 @@ def reverse_instances(dumpname): # reverse all fields in all records from that heap reverse_heap(memory_handler, heap_addr) + # then and only then can we look at the PointerFields + for heap in heaps: ctx = memory_handler.get_cached_context_for_heap(heap) # identify pointer relation between structures log.debug('Reversing PointerFields') pfr = PointerFieldReverser(ctx) pfr.reverse() - - # graph pointer relations between structures - log.debug('Reversing PointerGraph') - ptrgraph = PointerGraphReverser(ctx) - ptrgraph.reverse() + # save that ctx.save_structures() - # save to file save_headers(ctx) + # and then + # graph pointer relations between structures + log.debug('Reversing PointerGraph') + ptrgraph = PointerGraphReverser(memory_handler) + ptrgraph.reverse() + return diff --git a/haystack/reverse/searchers.py b/haystack/reverse/searchers.py index 53118b80..0d01fbbc 100644 --- a/haystack/reverse/searchers.py +++ b/haystack/reverse/searchers.py @@ -20,6 +20,7 @@ log = logging.getLogger('searchers') + class AFeedbackGiver(object): """ Class to give feedback at every step. @@ -35,9 +36,10 @@ def feedback(self, step, val): # log.info('processing vaddr 0x%x'%(val)) raise NotImplementedError + class NoFeedback(AFeedbackGiver): def __init__(self): - self.count = 1 + super(NoFeedback, self).__init__(1) def feedback(self, step, val): """ make a feedback""" @@ -95,6 +97,7 @@ def _check_steps(self, step): def get_search_mapping(self): return self._search_mapping + class WordAlignedSearcher(AbstractSearcher): """ Search for something in memspace. @@ -124,6 +127,7 @@ def search(self): self._values = [t for t in self] return self._values + class AllocatedWordAlignedSearcher(WordAlignedSearcher): """ Search for something in allocated memspace. @@ -170,10 +174,11 @@ def __iter__(self): def reverse_lookup(opt): from haystack.reverse import context log.info('[+] Load context') - ctx = context.get_context(opt.dumpname) + # FIXME, it should be the heap_addr addr = opt.struct_addr + ctx = context.get_context(opt.dumpname, addr) - log.info('[+] find offsets of struct_addr:%x' % (addr)) + log.info('[+] find offsets of struct_addr:%x' % addr) i = -1 structs = set() try: @@ -203,9 +208,9 @@ def merge_dump(dumpname): word_size = memory_handler.get_target_platform().get_word_size() feedback = NoFeedback() # get pointers in stack - stack_searcher = WordAlignedSearcher(stack, PointerSearcher(memory_handler), feedback, word_size) + stack_searcher = WordAlignedSearcher(stack, matchers.PointerSearcher(memory_handler), feedback, word_size) - heap_searcher = WordAlignedSearcher(heap, PointerSearcher(memory_handler), feedback, word_size) + heap_searcher = WordAlignedSearcher(heap, matchers.PointerSearcher(memory_handler), feedback, word_size) pointersFromHeap = heap_searcher.search() pointersFromStack = stack_searcher.search() pointersFromHeap = sorted(pointersFromHeap) diff --git a/haystack/reverse/structure.py b/haystack/reverse/structure.py index d4db51d5..6c5658d3 100644 --- a/haystack/reverse/structure.py +++ b/haystack/reverse/structure.py @@ -56,7 +56,7 @@ def remap_load(_context, address, newmappings): if p is None: return None # YES we do want to over-write _memory_handler and bytes - p._memory_handler = _context.memory_handler + p.set_memory_handler(_context.memory_handler) return p @@ -119,7 +119,9 @@ def _load(self): raise e # bad file removed if not isinstance(p, AnonymousRecord): raise EOFError("not a AnonymousRecord in cache. %s", p.__class__) - p._memory_handler = self._context.memory_handler + if isinstance(p, CacheWrapper): + raise TypeError("Why is a cache wrapper pickled?") + p.set_memory_handler(self._memory_handler) p._dirty = False CacheWrapper.refs[self.address] = p self.obj = weakref.ref(p) @@ -159,16 +161,16 @@ class AnonymousRecord(object): Comparaison between struct is done is relative addresse space. """ - def __init__(self, context, _address, size, prefix=None): + def __init__(self, memory_handler, _address, size, prefix=None): """ Create a record instance representing an allocated chunk to reverse. - :param context: the context of the allocated chunk + :param memory_handler: the memory_handler of the allocated chunk :param _address: the address of the allocated chunk :param size: the size of the allocated chunk :param prefix: the name prefix to identify the allocated chunk :return: """ - self._memory_handler = context.memory_handler + self._memory_handler = memory_handler self._target = self._memory_handler.get_target_platform() self.__address = _address self._size = size @@ -245,6 +247,11 @@ def saveme(self, _context): log.error("Pickling error, file %s removed", fname) os.remove(fname) raise e + except TypeError as e: + log.error(e) + # FIXME pickling a cachewrapper ???? + #import code + #code.interact(local=locals()) except RuntimeError as e: log.error(e) print self.to_string() @@ -266,10 +273,8 @@ def get_field_at_offset(self, offset): """ if offset < 0 or offset > len(self): raise IndexError("Invalid offset") - log.debug('Looking at child %s %s', self.to_string(), self.is_resolved()) - log.debug('Looking at child structure ID %d' % id(self)) - if not self.is_resolved(): - raise StructureNotResolvedError("Please run BasicCachingReverser at least once on this record") + if self.get_reverse_level() < 10: + raise StructureNotResolvedError("Reverse level %d is too low for record 0x%x", self.get_reverse_level(), self.address) # find the field ret = [f for f in self._fields if f.offset == offset] if len(ret) == 0: @@ -317,16 +322,9 @@ def bytes(self): # TODO re_string.Nocopy return self._bytes - # TODO replace by a numerical "reverse progression" index. - def is_resolved(self): - return self._resolved - - def set_resolved(self): - self._resolved = True + def set_memory_handler(self, memory_handler): + self._memory_handler = memory_handler - def is_resolvedPointers(self): - return self._resolvedPointers - ## def get_reverse_level(self): return self._reverse_level @@ -337,9 +335,7 @@ def to_string(self): # print self.fields self._fields.sort() fieldsString = '[ \n%s ]' % (''.join([field.to_string('\t') for field in self._fields])) - info = 'resolved:%s SIG:%s size:%d' % (self.is_resolved(), self.get_signature(text=True), len(self)) - if len(self.get_pointer_fields()) != 0: - info += ' resolvedPointers:%s' % (self.is_resolvedPointers()) + info = 'rlevel:%d SIG:%s size:%d' % (self.get_reverse_level(), self.get_signature(text=True), len(self)) ctypes_def = ''' class %s(ctypes.Structure): # %s _fields_ = %s @@ -437,13 +433,13 @@ class ReversedType(ctypes.Structure): """ @classmethod - def create(cls, context, name): - ctypes_type = context.getReversedType(name) + def create(cls, _context, name): + ctypes_type = _context.getReversedType(name) if ctypes_type is None: # make type an register it ctypes_type = type( name, (cls,), { '_instances': dict()}) # leave _fields_ out - context.addReversedType(name, ctypes_type) + _context.addReversedType(name, ctypes_type) return ctypes_type ''' add the instance to be a instance of this type ''' @@ -461,7 +457,7 @@ def getInstances(cls): return cls._instances @classmethod - def makeFields(cls, context): + def makeFields(cls, _context): # print '****************** makeFields(%s, context)'%(cls.__name__) root = cls.getInstances().values()[0] # try: diff --git a/haystack/reverse/utils.py b/haystack/reverse/utils.py index 1ad3becd..6687b6ea 100644 --- a/haystack/reverse/utils.py +++ b/haystack/reverse/utils.py @@ -96,7 +96,7 @@ def dequeue(addrs, start, end): return addrs, ret -def _get_cache_heap_pointers(ctx, enumerator): +def get_cache_heap_pointers(ctx, enumerator): """ Cache or return Heap pointers values in enumerator . :param dumpfilename: @@ -122,6 +122,7 @@ def _get_cache_heap_pointers(ctx, enumerator): log.info('[+] Loading from cache %d pointers %d unique', len(heap_values), len(set(heap_values))) return heap_addrs, heap_values + def cache_get_user_allocations(ctx, heap_walker): """ cache the user allocations, which are the allocated chunks diff --git a/haystack/structures/win32/winheapwalker.py b/haystack/structures/win32/winheapwalker.py index 894a6dc1..b933ebaa 100644 --- a/haystack/structures/win32/winheapwalker.py +++ b/haystack/structures/win32/winheapwalker.py @@ -44,6 +44,7 @@ def get_free_chunks(self): return self._free_chunks def _set_chunk_lists(self): + # FIXME ? why use a ctypes module here ? sublen = ctypes.sizeof(self._heap_module.HEAP_ENTRY) # get all chunks vallocs, va_free = self._get_virtualallocations() diff --git a/haystack/structures/win32/winxpheap.py b/haystack/structures/win32/winxpheap.py index 820eafea..56750bf8 100644 --- a/haystack/structures/win32/winxpheap.py +++ b/haystack/structures/win32/winxpheap.py @@ -461,7 +461,7 @@ def HEAP_get_chunks(self, record): self._memory_handler.keepRef(chunk_header, self.win_heap.struct__HEAP_ENTRY, chunk_addr) # FIXME what is this hack chunk_header._orig_address_ = chunk_addr - log.debug('\t\tEntry: 0x%0.8x\n%s'%( chunk_addr, chunk_header)) + log.debug('\t\tEntry: 0x%0.8x\n%s', chunk_addr, chunk_header) flags = chunk_header._0._1.Flags size = chunk_header._0._0.Size if (flags & 1) == 1: diff --git a/setup.py b/setup.py index 20968f08..cdbdbc93 100644 --- a/setup.py +++ b/setup.py @@ -60,7 +60,7 @@ def run(self): setup(name="haystack", - version="0.29", + version="0.30", description="Search C Structures in a process' memory", long_description=open("README.md").read(), url="http://packages.python.org/haystack/", diff --git a/test/haystack/reverse/heuristics/test_dsa.py b/test/haystack/reverse/heuristics/test_dsa.py index 2be23137..92baf177 100644 --- a/test/haystack/reverse/heuristics/test_dsa.py +++ b/test/haystack/reverse/heuristics/test_dsa.py @@ -204,11 +204,12 @@ def test_ascii(self): class TestDSA(unittest.TestCase): @classmethod - def setUpClass(self): + def setUpClass(cls): # context.get_context('test/src/test-ctypes3.dump') - self.context = None - self.putty7124 = context.get_context(putty_7124_win7.dumpname, putty_7124_win7.known_heaps[0][0]) - self.dsa = dsa.DSASimple(self.putty7124.memory_handler) + cls.context = None + cls.putty7124 = context.get_context(putty_7124_win7.dumpname, putty_7124_win7.known_heaps[0][0]) + cls.dsa = dsa.DSASimple(cls.putty7124.memory_handler) + cls.memory_handler = cls.putty7124.memory_handler def setUp(self): pass @@ -222,7 +223,7 @@ def test_utf_16_le_null_terminated(self): # struct_682638 in putty.7124.dump vaddr = 0x682638 size = 184 - st = structure.AnonymousRecord(self.putty7124, vaddr, size) + st = structure.AnonymousRecord(self.memory_handler, vaddr, size) self.dsa.analyze_fields(st) # print repr(st.bytes) log.debug(st.to_string()) @@ -240,7 +241,7 @@ def test_utf_16_le_non_null_terminated(self): # struct_691ed8 in putty.7124.dump vaddr = 0x691ed8 size = 256 - st = structure.AnonymousRecord(self.putty7124, vaddr, size) + st = structure.AnonymousRecord(self.memory_handler, vaddr, size) self.dsa.analyze_fields(st) # print repr(st.bytes) log.debug(st.to_string()) @@ -254,7 +255,7 @@ def test_ascii_null_terminated_2(self): # struct_64f328 in putty.7124.dump vaddr = 0x64f328 size = 72 - st = structure.AnonymousRecord(self.putty7124, vaddr, size) + st = structure.AnonymousRecord(self.memory_handler, vaddr, size) self.dsa.analyze_fields(st) # print repr(st.bytes) log.debug(st.to_string()) @@ -268,7 +269,7 @@ def test_utf_16_le_null_terminated_3(self): # in putty.7124.dump vaddr = 0x657488 size = 88 - st = structure.AnonymousRecord(self.putty7124, vaddr, size) + st = structure.AnonymousRecord(self.memory_handler, vaddr, size) self.dsa.analyze_fields(st) # print repr(st.bytes) log.debug(st.to_string()) @@ -282,7 +283,7 @@ def test_big_block(self): # in putty.7124.dump vaddr = 0x63d4c8 # + 1968 size = 4088 # 128 - st = structure.AnonymousRecord(self.putty7124, vaddr, size) + st = structure.AnonymousRecord(self.memory_handler, vaddr, size) self.dsa.analyze_fields(st) # print repr(st.bytes) log.debug(st.to_string()) @@ -300,7 +301,7 @@ def test_uuid(self): # in putty.7124.dump vaddr = 0x63aa68 size = 120 - st = structure.AnonymousRecord(self.putty7124, vaddr, size) + st = structure.AnonymousRecord(self.memory_handler, vaddr, size) self.dsa.analyze_fields(st) # print repr(st.bytes) log.debug(st.to_string()) @@ -316,7 +317,7 @@ def test_big_block_2(self): # its garbage anyway vaddr = 0x675b30 size = 8184 - st = structure.AnonymousRecord(self.putty7124, vaddr, size) + st = structure.AnonymousRecord(self.memory_handler, vaddr, size) self.dsa.analyze_fields(st) # print repr(st.bytes) log.debug(st.to_string()) @@ -344,8 +345,8 @@ def setUpClass(cls): # new test from real case zeus.856 @0xb2e38 cls.real = '\xc81\x0b\x00\xa8*\x0b\x00\x01\x00\x00\x00\x00\x00\x00\x00f \x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\xe0\xa9`\x9dz3\xd0\x11\xbd\x88\x00\x00\xc0\x82\xe6\x9a\xed\x03\x00\x00\x01\x00\x00\x00\xc8\xfc\xbe\x02p\x0c\x00\x00\x08\x00\x00\x00\x1d\x00\x02\x00L\xfd\xbe\x02\xd8\x91\x1b\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x10\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00R\x00S\x00V\x00P\x00 \x00T\x00C\x00P\x00 \x00S\x00e\x00r\x00v\x00i\x00c\x00e\x00 \x00P\x00r\x00o\x00v\x00i\x00d\x00e\x00r\x00\x00\x00f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xab\xe9\x90|\xf2\x94\x80|\x00P\xfd\x7f\x00\x00\x1c\x00\x08\x00\x00\x00\x00\x00\x00\x00t\xfc\xbe\x02\\\r\x91|\x00\x00\x1c\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x88\xb0\xd2\x01\\\r\x91|\x00\x00\x1c\x00\x91\x0e\x91|\x08\x06\x1c\x00m\x05\x91|h^\xd0\x01\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\xc3\x00\x01\x00\x00\x000\x02\x1c\x00\x02\x00\x00\x00\x90\xb0\xd2\x01\x03\x00\x00\x00\x02\x00\x00\x00h^\xd0\x010\x02\x1c\x00\xd8>\xd4\x010\xf0\xfc\x00\xb8\x02\x1c\x00\xe8?\xd4\x01\xd8\x01\x1c\x00\x00\x00\x00\x00\x10\x00\x00\x00\xe8?\xd4\x01\x0c\x00\x00\x00\x05\x00\x00\x00\xf0\x06\x91|\xe0\x01\x1c\x00\x18\x00\x00\x00\xe0>\xd4\x01\x00\x00\x1c\x00\x01\x00\x00\x00\x08\x00\x00\x00\xe0\x01\x1c\x00@\x00\x00\x00\xf0?\xd4\x01\xa8\x04\x1c\x00\x00\x00\x1c\x00Om\x01\x01\x84^\xd0\x01`\x00\x00\x00\xb8\x02\x1c\x00\x00\x00\x00\x00\xd8>\xd4\x01\x88\xfc\xbe\x02F\x0f\x91|\r\x00\x00\x00\xd8>\xd4\x01\x00\x00\x1c\x00\x10<\xd4\x01\x00\x00\x00\x00\\\xfd\xbe\x02\\\r\x91|\x00\x00\x1c\x00\x91\x0e\x91|\x08\x06\x1c\x00m\x05\x91|`\xab\xf0\x00\x00\x00\x00\x00\xec<\xca\x02\x00\x00\xc3\x00\x0c\x00\x00\x00\x10<\xd4\x01\x00\x00\x00\x00\x00\x00\x00\x00\xd0\x0c\x00\x00\x00\x00\x00\x00\x18<\xd4\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\x0c\x00\x00(\xfd\xbe\x02\xa8\x04\x1c\x00\xd0\x0c\x00\x00@\x00\x00\x00\x03\x00\x00\x00\x18<\xd4\x01\xa8\x04\x1c\x00`\xab\xf0\x00\xc8\x02\x00\x00\xec<\xca\x02\x0c\x00\x0e\x00 #include -// TEST ZONE +// TEST Arrays -struct Node { - unsigned int val1; +struct ArrayTest1 { + unsigned int array1[16]; void * ptr2; + char char3; + unsigned int array4[16]; + void * ptr5; }; int test1(){ - struct Node * node; - node = (struct Node *) malloc(sizeof(struct Node)); - node->val1 = 0xdeadbeef; + struct ArrayTest1 * test1; + node = (struct ArrayTest1 *) malloc(sizeof(struct ArrayTest1)); node->ptr2 = node; + node->ptr5 = node; + node->char3 = 'X' + printf("o: test1 %p\n", node); - + return 0; } @@ -25,11 +30,11 @@ int main(){ // TEST test1(); - + printf("pid %u\n",getpid()); fflush(stdout); sleep(-1); - + return 0; } diff --git a/test/testfiles.py b/test/testfiles.py index b077afd5..67cecd50 100644 --- a/test/testfiles.py +++ b/test/testfiles.py @@ -116,6 +116,8 @@ def __init__(self, dumpname): (0x00e80000, 0x10000), (0x7f6f0000, 0x100000)] +zeus_856_svchost_exe.known_records = [(0x992f0, 14720)] + # putty.1.dump is a win7 32 bits memory dump putty_1_win7 = TestDump('test/dumps/putty/putty.1.dump') putty_1_win7.known_heaps = [(0x00390000, 0x3000),