From ee9de26b8fbcb43f8e4e7608468544d39eb65a10 Mon Sep 17 00:00:00 2001 From: Loic Jaquemet Date: Tue, 27 Oct 2015 18:25:46 -0600 Subject: [PATCH] more reverse --- CHANGES.txt | 3 + haystack/mappings/base.py | 1 - haystack/reverse/config.py | 11 +-- haystack/reverse/context.py | 41 +++++++++- haystack/reverse/heuristics/dsa.py | 10 ++- haystack/reverse/heuristics/pointertypes.py | 9 +++ haystack/reverse/heuristics/reversers.py | 79 ++++++++++++++++--- haystack/reverse/structure.py | 20 +++-- scripts/haystack-reverse | 9 +-- .../reverse/heuristics/test_reversers.py | 8 ++ test/haystack/reverse/test_context.py | 57 +++++++++++-- 11 files changed, 210 insertions(+), 38 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index f6332929..c515a813 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -18,6 +18,9 @@ Changes to architecture.txt: 2015-10: - cleaning up the reverse API - Fix sigsegv bug source in reset_mappings() + - Fix the double linked reverser + - Cleaned up the reverse/structure module + - Added a fun constraints heuristics on reversed types's field. 2015-09: - Working on Volatility dump reader diff --git a/haystack/mappings/base.py b/haystack/mappings/base.py index 526a4383..78c81207 100644 --- a/haystack/mappings/base.py +++ b/haystack/mappings/base.py @@ -46,7 +46,6 @@ log = logging.getLogger('memorybase') - class AMemoryMapping(interfaces.IMemoryMapping): """ diff --git a/haystack/reverse/config.py b/haystack/reverse/config.py index 25f62c00..bf970018 100644 --- a/haystack/reverse/config.py +++ b/haystack/reverse/config.py @@ -42,23 +42,27 @@ SIGNATURES_FILENAME = 'signatures' WORDS_FOR_REVERSE_TYPES_FILE = 'data/words.100' + def create_cache_folder_name(dumpname): folder = get_cache_folder_name(dumpname) if not os.access(folder, os.F_OK): os.mkdir(folder) return + def remove_cache_folder(dumpname): folder = get_cache_folder_name(dumpname) if os.access(folder, os.F_OK): shutil.rmtree(folder) return + def get_cache_folder_name(dumpname): root = os.path.abspath(dumpname) return os.path.sep.join([root, CACHE_NAME]) -def get_cache_filename(typ, dumpname, address): + +def get_cache_filename(typ, dumpname, address=None): """ Returns a filename for caching a type of data based on the dump filename. @@ -70,12 +74,9 @@ def get_cache_filename(typ, dumpname, address): fname = typ if address is not None: fname = '%x.%s' % (address, typ) - else: - log.warning('usage without uuid') - import pdb - pdb.set_trace() return os.path.sep.join([get_cache_folder_name(dumpname), fname]) + def get_record_cache_folder_name(dumpname): """ Returns a dirname for caching the structures based on the dump filename. diff --git a/haystack/reverse/context.py b/haystack/reverse/context.py index 700e187f..8ee9fe5b 100644 --- a/haystack/reverse/context.py +++ b/haystack/reverse/context.py @@ -36,12 +36,43 @@ def __init__(self, memory_handler): # create the cache folder then self.reset_cache_folder() + # def get_functions_pointers(self): + # try: + # return self.get_cache_radare() + # except IOError as e: + # return self.save_cache_radare() + # + # def get_cache_radare(self): + # dumpname = self.memory_handler.get_name() + # fname = config.get_cache_filename(config.CACHE_FUNCTION_NAMES, dumpname) + # functions = None + # try: + # with file(fname, 'r') as fin: + # functions = pickle.load(fin) + # except EOFError as e: + # os.remove(fname) + # log.error('Error in the radare cache file. File cleaned. Please restart.') + # raise RuntimeError('Error in the radare cache file. File cleaned. Please restart.') + # return functions + # + # def save_cache_radare(self): + # from haystack.reverse.heuristics import radare + # func = radare.RadareAnalysis(self.memory_handler) + # func.init_all_functions() + # import code + # code.interact(local=locals()) + # dumpname = self.memory_handler.get_name() + # fname = config.get_cache_filename(config.CACHE_FUNCTION_NAMES, dumpname) + # with file(fname, 'w') as fout: + # pickle.dump(func.functions, fout) + # return func.functions + def reset_cache_folder(self): """Removes the cache folder""" dumpname = self.memory_handler.get_name() # create the cache folder cache_folder = config.get_cache_folder_name(dumpname) - config.remove_cache_folder(self.memory_handler.get_name()) + # config.remove_cache_folder(self.memory_handler.get_name()) if not os.access(cache_folder, os.F_OK): os.mkdir(cache_folder) log.info("[+] Cache created in %s", cache_folder) @@ -64,7 +95,7 @@ def get_context_for_heap(self, heap): if not isinstance(heap, interfaces.IMemoryMapping): raise TypeError('heap should be a IMemoryMapping') if not heap.is_marked_as_heap(): - raise TypeError('heap should be a heap') + raise TypeError('heap should be a heap: %s', heap) if heap.get_marked_heap_address() not in self.__contextes: heap_context = self.make_context_for_heap(heap) self._set_context_for_heap(heap, heap_context) @@ -84,7 +115,7 @@ def add_reversed_type(self, typename, t): self.__reversed_types[typename] = t def list_reversed_types(self): - return self.__reversed_types.values() + return self.__reversed_types.keys() # was get_context def make_context_for_heap(self, heap): @@ -102,6 +133,10 @@ def make_context_for_heap(self, heap): ctx = HeapContext(self.memory_handler, heap) return ctx + def get_filename_cache_headers(self): + dumpname = self.memory_handler.get_name() + return config.get_cache_filename(config.CACHE_GENERATED_PY_HEADERS_VALUES, dumpname) + class HeapContext(object): """ diff --git a/haystack/reverse/heuristics/dsa.py b/haystack/reverse/heuristics/dsa.py index bf73773e..ca362623 100644 --- a/haystack/reverse/heuristics/dsa.py +++ b/haystack/reverse/heuristics/dsa.py @@ -164,6 +164,11 @@ def make_fields(self, structure, offset, size): size -= self._word_size offset += self._word_size continue + # 20151026 - if aligned, ignore it + if value % self._target.get_word_size(): + size -= self._word_size + offset += self._word_size + continue # we have a pointer log.debug('checkPointer offset:%s value:%s' % (offset, hex(value))) field = fieldtypes.PointerField('ptr_%d' % offset, offset, self._word_size) @@ -238,7 +243,10 @@ class FieldReverser(model.AbstractReverser): IntegerFields: if the word value is small ( |x| < 65535 ) PointerFields: if the word if a possible pointer value - If the word content does not match theses heuristics, tag the fiel has unknown. + If the word content does not match theses heuristics, tag the field has unknown. + + TODO: UTF16 array corrector, if utf16 field is preceded by smallint, aggregate both in utf16, + event if not aligned. """ REVERSE_LEVEL = 10 diff --git a/haystack/reverse/heuristics/pointertypes.py b/haystack/reverse/heuristics/pointertypes.py index 93e2f70d..7345da3c 100644 --- a/haystack/reverse/heuristics/pointertypes.py +++ b/haystack/reverse/heuristics/pointertypes.py @@ -4,6 +4,7 @@ from haystack.reverse import context from haystack.reverse.heuristics import model +from haystack.reverse.heuristics import radare log = logging.getLogger("pointertypes") @@ -20,6 +21,11 @@ class PointerFieldReverser(model.AbstractReverser): """ REVERSE_LEVEL = 50 + def __init__(self, _memory_handler): + super(PointerFieldReverser, self).__init__(_memory_handler) + # process_context = self._memory_handler.get_reverse_context() + # self.__functions_pointers = process_context.get_functions_pointers() + def reverse_record(self, _context, _record): """ @returns structure, with enriched info on pointer fields. @@ -55,6 +61,9 @@ def reverse_record(self, _context, _record): field.set_pointee_ctype('void') # TODO: Function pointer ? field.name = 'ptr_ext_lib_%d' % field.offset + # if value in self.__functions_pointers: + # size, bbs, name = self.__functions_pointers[value] + # field.name = 'func_ptr_%s_%d' % (name, field.offset) continue tgt = None try: diff --git a/haystack/reverse/heuristics/reversers.py b/haystack/reverse/heuristics/reversers.py index e2f185cb..bc3ef7db 100644 --- a/haystack/reverse/heuristics/reversers.py +++ b/haystack/reverse/heuristics/reversers.py @@ -9,6 +9,7 @@ from haystack.abc import interfaces from haystack.reverse import config +from haystack.reverse import context from haystack.reverse import structure from haystack.reverse import fieldtypes from haystack.reverse import utils @@ -76,7 +77,7 @@ def reverse_context(self, _context): # self._todo = sorted(set(self._allocations) - set(self._done_records)) self._fromcache = len(self._allocations) - len(self._todo) - log.info('[+] Adding new raw structures from getUserAllocations cached contents - %d todo', len(self._todo)) + log.info('[+] Adding new raw structures from user allocations - %d todo', len(self._todo)) super(BasicCachingReverser, self).reverse_context(_context) def reverse_record(self, _context, _record): @@ -388,10 +389,11 @@ def rename_record_type(self, _members, offset): heap = self._memory_handler.get_mapping_for_address(list_item_addr) _context = self._process_context.get_context_for_heap(heap) _item = _context.get_record_for_address(list_item_addr) + ### KEEP THIS if len(_item) != len(_record): log.warning("x2 linked reverser: len(_item) != len(_record)") else: - _item.set_record_type(_record_type) + _item.set_record_type(_record_type, True) # push the LIST_ENTRY type into the context/memory_handler rev_context = self._memory_handler.get_reverse_context() @@ -411,6 +413,15 @@ def debug_lists(self): for _list in res: log.debug("%s items:\t[%s]", len(_list), ','.join([hex(addr) for addr in _list])) + def rename_all_lists(self): + # rename all lists + for size, offset_lists in self.lists.items(): + for offset, multiple_lists in offset_lists.items(): + for members_list in multiple_lists: + nb = len(members_list) + rt = self.rename_record_type(members_list, offset) + log.debug('%d members for : %s', nb, rt.to_string()) + class PointerGraphReverser(model.AbstractReverser): """ @@ -474,7 +485,11 @@ def reverse_record(self, heap_context, _record): self._master_graph.add_edge(hex(_record.address), hex(pointee_addr)) # but we only feed the heaps graph if the target is known heap = self._memory_handler.get_mapping_for_address(pointee_addr) - heap_context = self._memory_handler.get_reverse_context().get_context_for_heap(heap) + try: + heap_context = context.get_context_for_address(self._memory_handler, pointee_addr) + except ValueError as e: + continue + #heap_context = self._memory_handler.get_reverse_context().get_context_for_heap(heap) if heap_context is None: continue try: @@ -490,6 +505,10 @@ def reverse_record(self, heap_context, _record): class ArrayFieldsReverser(model.AbstractReverser): """ Aggregate fields of similar type into arrays in the record. + + Check d4008 in zeus. nice array + d2008 is a major player + 90688 too """ REVERSE_LEVEL = 200 @@ -660,6 +679,38 @@ def refreshOne(context, ptr_value): return mystruct +def save_process_headers(memory_handler): + """ + Save the python class code definition to file. + + :param ctx: + :return: + """ + process_context = memory_handler.get_reverse_context() + log.info('[+] saving headers for process') + fout = open(process_context.get_filename_cache_headers(), 'w') + towrite = [] + # + for r_type in process_context.list_reversed_types(): + members = process_context.get_reversed_type(r_type) + from haystack.reverse.heuristics import constraints + rev = constraints.ConstraintsReverser(memory_handler) + txt = rev.verify(r_type, members) + towrite.extend(txt) + towrite.append("# %d members" % len(members)) + towrite.append(r_type.to_string()) + if len(towrite) >= 10000: + try: + fout.write('\n'.join(towrite)) + except UnicodeDecodeError as e: + print 'ERROR on ', r_type + towrite = [] + fout.flush() + fout.write('\n'.join(towrite)) + fout.close() + return + + def save_headers(ctx, addrs=None): """ Save the python class code definition to file. @@ -700,18 +751,20 @@ def reverse_heap(memory_handler, heap_addr): :return: """ from haystack.reverse import context - log.debug('[+] Loading the memory dump for HEAP 0x%x', heap_addr) + log.info('[+] Loading the memory dump for HEAP 0x%x', heap_addr) ctx = context.get_context_for_address(memory_handler, heap_addr) try: # decode bytes contents to find basic types. - log.debug('Reversing Fields') + log.info('Reversing Fields') fr = dsa.FieldReverser(memory_handler) fr.reverse_context(ctx) # try to find some logical constructs. - log.debug('Reversing DoubleLinkedListReverser') + log.info('Reversing DoubleLinkedListReverser') + # why is this a reverse_context ? doublelink = DoubleLinkedListReverser(memory_handler) doublelink.reverse_context(ctx) + doublelink.rename_all_lists() # save to file save_headers(ctx) @@ -735,9 +788,9 @@ def reverse_instances(memory_handler): :return: """ assert isinstance(memory_handler, interfaces.IMemoryHandler) - if True: + if False: # decode bytes contents to find basic types. - log.debug('Reversing Fields') + log.info('Reversing Fields') fr = dsa.FieldReverser(memory_handler) fr.reverse() # try to find some logical constructs. @@ -754,10 +807,12 @@ def reverse_instances(memory_handler): # then and only then can we look at the PointerFields # identify pointer relation between structures - log.debug('Reversing PointerFields') + log.info('Reversing PointerFields') pfr = pointertypes.PointerFieldReverser(memory_handler) pfr.reverse() + # TODO save process type record + # save that for heap in heaps: ctx = memory_handler.get_reverse_context().get_context_for_heap(heap) @@ -765,10 +820,14 @@ def reverse_instances(memory_handler): # save to file save_headers(ctx) + save_process_headers(memory_handler) + # and then # graph pointer relations between structures - log.debug('Reversing PointerGraph') + log.info('Reversing PointerGraph') ptrgraph = PointerGraphReverser(memory_handler) ptrgraph.reverse() + # todo save graph method + return diff --git a/haystack/reverse/structure.py b/haystack/reverse/structure.py index 237357c6..d9401ba7 100644 --- a/haystack/reverse/structure.py +++ b/haystack/reverse/structure.py @@ -180,8 +180,7 @@ def __init__(self, memory_handler, _address, size, prefix=None): self.__address = _address self._size = size self._reverse_level = 0 - from haystack.reverse import structure - self.__record_type = structure.RecordType('struct_%x' % self.__address, self._size, []) + self.__record_type = RecordType('struct_%x' % self.__address, self._size, []) self.reset() # set fields self.set_name(prefix) return @@ -235,9 +234,10 @@ def reset(self): self._dirty = True self._ctype = None self._bytes = None + self.__final = False return - def set_record_type(self, record_type): + def set_record_type(self, record_type, final_type=False): """ Assign a reversed record type to this instance. That will change the fields types and render this record immutable. @@ -246,6 +246,7 @@ def set_record_type(self, record_type): :return: """ self.__record_type = record_type + self.__final = final_type def get_fields(self): """ @@ -363,11 +364,20 @@ def to_string(self): field_string_lines.append('\t'+field.to_string(field_value)) fieldsString = '[ \n%s ]' % (''.join(field_string_lines)) info = 'rlevel:%d SIG:%s size:%d' % (self.get_reverse_level(), self.get_signature_text(), len(self)) + final_ctypes = 'ctypes.Structure' + # no renaming in instances.. + # if self.__final: + # final_ctypes = self.__record_type.name + # ctypes_def = ''' + #%s = %s # %s + # + #''' % (self.get_name(), final_ctypes, info) + # else: ctypes_def = ''' -class %s(ctypes.Structure): # %s +class %s(%s): # %s _fields_ = %s -''' % (self.get_name(), info, fieldsString) +''' % (self.get_name(), final_ctypes, info, fieldsString) return ctypes_def def __contains__(self, other): diff --git a/scripts/haystack-reverse b/scripts/haystack-reverse index aceab406..d1977088 100644 --- a/scripts/haystack-reverse +++ b/scripts/haystack-reverse @@ -19,6 +19,8 @@ import os from haystack import argparse_utils from haystack.reverse import config from haystack.reverse import context +from haystack.reverse.heuristics import reversers +from haystack.reverse.heuristics import signature from haystack import dump_loader log = logging.getLogger('haystack-reverse') @@ -26,9 +28,9 @@ log = logging.getLogger('haystack-reverse') def reverse_instances(opt): - import reversers # go through the 4 step process. Double linked list, basic type, Pointers and graph. - ctx = reversers.reverse_instances(opt.dumpname) + memory_handler = dump_loader.load(opt.dumpname) + ctx = reversers.reverse_instances(memory_handler) return def writeReversedTypes(opt): @@ -38,7 +40,6 @@ def writeReversedTypes(opt): :param opt: :return: """ - import signature ctx, sizeCache = signature.makeSizeCaches(opt.dumpname) ctx = signature.makeReversedTypes(ctx, sizeCache) outfile = file(config.get_cache_filename(config.REVERSED_TYPES_FILENAME, ctx.dumpname),'w') @@ -61,7 +62,6 @@ def groupStructures(opt): :param opt: :return: """ - import signature ctx, sizeCache = signature.makeSizeCaches(opt.dumpname) for chains in signature.buildStructureGroup(ctx, sizeCache, opt.size ): signature.printStructureGroups(ctx, chains, opt.address ) @@ -73,7 +73,6 @@ def saveSignatures(opt): :param opt: :return: """ - import signature ctx, sig = signature.makeSignatures(opt.dumpname) outfile = config.get_cache_filename(config.SIGNATURES_FILENAME, ctx.dumpname) file(outfile,'w').write(sig) diff --git a/test/haystack/reverse/heuristics/test_reversers.py b/test/haystack/reverse/heuristics/test_reversers.py index 1f55839f..69579e18 100644 --- a/test/haystack/reverse/heuristics/test_reversers.py +++ b/test/haystack/reverse/heuristics/test_reversers.py @@ -141,6 +141,7 @@ def test_double_iter(self): # reverse the types for the list of items 40, at offset 8 offset = 8 self.dllr.rename_record_type(members_list, offset) + # print mid.to_string() # now the second field should be "entry" LIST ENTRY type with 2 subfields. one_ptr = start.get_fields()[1] @@ -738,6 +739,13 @@ def test_doublelink(self): print _record.to_string() pass + def test_otherlink(self): + # 0xa6f40, 0xa6f70 + _record = self._context.get_record_for_address(0xccd00) + print _record.to_string() + import code + code.interact(local=locals()) + if __name__ == '__main__': logging.basicConfig(level=logging.INFO) diff --git a/test/haystack/reverse/test_context.py b/test/haystack/reverse/test_context.py index 4f89a387..2c5dc83d 100644 --- a/test/haystack/reverse/test_context.py +++ b/test/haystack/reverse/test_context.py @@ -8,6 +8,10 @@ from haystack import dump_loader from haystack.reverse import context +from haystack.reverse import structure +from haystack.reverse import fieldtypes +from haystack.reverse import config + from test.haystack import SrcTests @@ -38,7 +42,6 @@ def test_get_context(self): self.memory_handler.get_mapping_for_address(0xb84e02d3)) -@unittest.skip('debug sigseg') class TestMappingsWindows(SrcTests): @classmethod @@ -61,7 +64,6 @@ def test_get_context(self): :return: """ - self.putty = context.get_context('test/dumps/putty/putty.1.dump') memory_handler = self.putty.memory_handler # print ''.join(['%s\n'%(m) for m in _memory_handler]) with self.assertRaises(ValueError): @@ -79,12 +81,51 @@ def test_get_context(self): self.putty = None - def test_non_allocated_pointers_are_useless(self): - self.putty = context.get_context('test/dumps/putty/putty.1.dump') - memory_handler = self.putty.memory_handler - allocated_pointers = self.putty._structures_addresses - pointers_values = self.putty._pointers_values - pointers_offsets = self.putty._pointers_offsets +class TestProcessContext(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.dumpname = 'test/src/test-ctypes6.32.dump' + config.remove_cache_folder(cls.dumpname) + cls.memory_handler = dump_loader.load(cls.dumpname) + cls.my_target = cls.memory_handler.get_target_platform() + cls.my_ctypes = cls.my_target.get_target_ctypes() + cls.my_utils = cls.my_target.get_target_ctypes_utils() + + @classmethod + def tearDownClass(cls): + cls.memory_handler.reset_mappings() + cls.memory_handler = None + cls.my_target = None + cls.my_ctypes = None + cls.my_utils = None + config.remove_cache_folder(cls.dumpname) + + def test_save_record_type(self): + process_context = self.memory_handler.get_reverse_context() + + _record = structure.AnonymousRecord(self.memory_handler, 0xdeadbeef, 40) + word_size = self.my_target.get_word_size() + + f1 = fieldtypes.Field('f1', 0*word_size, fieldtypes.ZEROES, word_size, False) + f2 = fieldtypes.Field('f2', 1*word_size, fieldtypes.ZEROES, word_size, False) + fields = [f1, f2] + _record_type = structure.RecordType('struct_test', 2*word_size, fields) + _record.set_record_type(_record_type) + # same fields + self.assertEqual(f1, _record.get_fields()[0]) + self.assertEqual(f1, _record.get_field('f1')) + # get_fields return a new list of fields + x = _record.get_fields() + self.assertEqual(x, _record.get_fields()) + x.pop(0) + self.assertNotEqual(x, _record.get_fields()) + + process_context.add_reversed_type(_record_type, [1,2,3]) + + r_types = process_context.list_reversed_types() + self.assertEqual(r_types[0].name, 'struct_test') + if __name__ == '__main__': logging.basicConfig(level=logging.INFO)