From ae742a4e0d5b3baf2d5a3a0dae4a50ae27aa1067 Mon Sep 17 00:00:00 2001 From: Loic Jaquemet Date: Sun, 13 Sep 2015 09:21:26 -0600 Subject: [PATCH] Some work on #12 - but the reverse script is definitively not ready for master --- CHANGES.txt | 17 +- TODO | 16 +- haystack/reverse/context.py | 12 +- haystack/reverse/reversers.py | 4 +- haystack/reverse/signature.py | 37 ++-- scripts/haystack-reverse | 314 ++++++++++++++++++---------------- 6 files changed, 202 insertions(+), 198 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index a89b0703..94bd1e78 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,9 +1,6 @@ Ongoing: - - Volatility plugin - - Volatility memorymap - - merge in master - - - move interfaces into an interfaces module, maybe use abc. + - stabilisation of search + - refactorisation of reverse Changes to architecture.txt: - IMemoryHandler: there is a process memory concept (dump, live, vol, rekall) that need a handler. @@ -19,14 +16,13 @@ Changes to architecture.txt: - the memory l - - model should be an instance. - - mapping should represent the memory dump entity - - ctypes for internal haystack loading should be a Proxy ctypes, saved the mapping instance - 2015-09: - Working on Volatility dump reader - working a WinXP chunk iterator - adding PEB to winXP list of know structures + - Working volatility plugins in repo https://github.com/trolldbois/volatility_plugins + - winheap to work with heap at w.e. offset, not with heap aligned on mapping.start. + - fixed winXP x86 heapwalker 2015-08: - Fixes for travis in setup.py @@ -41,8 +37,7 @@ OK (skipped=7, expected failures=4) - start standardizing test dumps in a test file inventory Ran 240 tests in 119.036s FAILED (failures=2, errors=9, skipped=11, expected failures=3) - - + - model is an instance. 2015-07: diff --git a/TODO b/TODO index a1ff0fd3..a72d6a95 100644 --- a/TODO +++ b/TODO @@ -1,4 +1,4 @@ - +- keep the original address in the Ouputters, python output. - re-correlate the is_valid methods to the record type, so that advanced code-based validation is allowed on a record, on top of constraits @@ -23,16 +23,11 @@ OK: python /home/other/Compil/python-haystack/scripts/haystack --debug --string - use PEB search to double check that we find all HEAPs in standard scenarios. -- orient winheap to work with heap, not with heap aligned on mapping.start. - -- FIX winXP x86 heapwalker, then work on vol. - use pycallgraph to cProfile a HEAP validation. - make a callback profiler that profiles the graph path validation of a structure in graphical format - using a decorator would be fun -***** heap walker validator is not a validator all by itself. - - add a depth parameter to constraints loading on list fields. - separate listmodel in a constraints like config file ? @@ -43,13 +38,9 @@ OK: python /home/other/Compil/python-haystack/scripts/haystack --debug --string - use ASN.1 for constraints. ?? -- FIX get_heap on windows XP/zeus demo. Extract a process from a vol dump to make a test case. - add a hintoffset search thingy, so that one can search for a structure at a particular offset of a mmap ? maybe to complex for API -- make a listmodel method for arrays of structures -- make the winxp freelists method to work. - -- use heapwalker plugins from rekall/volatility to create a proxy heapwalker ? +- make a listmodel method for arrays of structures, or not. Attribute packed is not correct in ctypeslib @@ -73,10 +64,11 @@ Todo: - documented example - Rekall plugin - Rekall memorymap - - make basicmodel:loadable members work with vtypes ? + - make basicmodel:loadable members work with vtypes (vol/rekall) ? - Check why pdfbparse reports some gaps in structs gap_in_pdb_ofs_3C (HEAP) - pylint ignore W0212 in profiles. - add PyQt4 as dependency for optional functions - Add to ipython + - Make reverse work again diff --git a/haystack/reverse/context.py b/haystack/reverse/context.py index 44844c6b..a6740cc5 100644 --- a/haystack/reverse/context.py +++ b/haystack/reverse/context.py @@ -49,14 +49,14 @@ def _init2(self): #ptr_values, ptr_offsets, aligned_ptr, not_aligned_ptr = utils.getHeapPointers(self.dumpname, self._memory_handler) # FIXME: no use I think - ##heap_offsets, heap_values = utils.getHeapPointers(self.dumpname, self._memory_handler) - ##self._pointers_values_heap = heap_values - ##self._pointers_offsets_heap = heap_offsets + heap_offsets, heap_values = utils.getHeapPointers(self.dumpname, self.memory_handler) + self._pointers_values_heap = heap_values + self._pointers_offsets_heap = heap_offsets # test with all mmap in target - ##all_offsets, all_values = utils.getAllPointers(self.dumpname, self._memory_handler) - ##self._pointers_values = all_values - ##self._pointers_offsets = all_offsets + all_offsets, all_values = utils.getAllPointers(self.dumpname, self.memory_handler) + self._pointers_values = all_values + self._pointers_offsets = all_offsets if self.memory_handler.get_target_platform().get_os_name() not in ['winxp', 'win7']: log.info('[+] Reversing function pointers names') diff --git a/haystack/reverse/reversers.py b/haystack/reverse/reversers.py index da05262f..5b2522e9 100644 --- a/haystack/reverse/reversers.py +++ b/haystack/reverse/reversers.py @@ -501,7 +501,7 @@ def iterateList(self, context, head_addr): def findHead(self, ctx, members): sizes = sorted([(ctx.getStructureSizeForAddr(m), m) for m in members]) - if sizes[0] < 3 * self.my_target.config.get_word_size(): + if sizes[0] < 3 * self.my_target.get_word_size(): log.error('a double linked list element must be 3 WORD at least') raise ValueError( 'a double linked list element must be 3 WORD at least') @@ -654,6 +654,8 @@ def reverseInstances(dumpname): if not os.access(config.get_record_cache_folder_name(ctx.dumpname), os.F_OK): os.mkdir(config.get_record_cache_folder_name(ctx.dumpname)) + log.info("[+] Cache created in %s", config.get_record_cache_folder_name(ctx.dumpname)) + # we use common allocators to find structures. #log.debug('Reversing malloc') #mallocRev = MallocReverser() diff --git a/haystack/reverse/signature.py b/haystack/reverse/signature.py index 8f0e6241..5d2e3743 100644 --- a/haystack/reverse/signature.py +++ b/haystack/reverse/signature.py @@ -11,8 +11,9 @@ import re import Levenshtein # seqmatcher ? import networkx +import numpy -from haystack.reverse.config import Config +from haystack.reverse import config from haystack.utils import xrange from haystack.reverse import pointerfinder @@ -43,8 +44,10 @@ def _init_signatures(self): # need to force resolve of structures self._signatures = [] for addr in map(long, self._structures_addresses): + # decode the fields self._context.getStructureForAddr( addr).decodeFields() # can be long + # get the signature for the record self._signatures.append( (addr, self._context.getStructureForAddr(addr).getSignature(True))) return @@ -69,8 +72,8 @@ def make(self): return def persist(self): - outdir = Config.getCacheFilename( - Config.CACHE_SIGNATURE_GROUPS_DIR, + outdir = config.get_cache_filename( + config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) if not os.path.isdir(outdir): os.mkdir(outdir) @@ -82,14 +85,14 @@ def persist(self): return def isPersisted(self): - outdir = Config.getCacheFilename( - Config.CACHE_SIGNATURE_GROUPS_DIR, + outdir = config.get_cache_filename( + config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) return os.access(os.path.sep.join([outdir, self._name]), os.F_OK) def load(self): - outdir = Config.getCacheFilename( - Config.CACHE_SIGNATURE_GROUPS_DIR, + outdir = config.get_cache_filename( + config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) inname = os.path.sep.join([outdir, self._name]) self._similarities = utils.int_array_cache(inname) @@ -109,11 +112,11 @@ def __init__(self, ctx): self._sizes = None def _loadCache(self): - outdir = Config.getCacheFilename( - Config.CACHE_SIGNATURE_SIZES_DIR, + outdir = config.get_cache_filename( + config.CACHE_SIGNATURE_SIZES_DIR, self._context.dumpname) fdone = os.path.sep.join( - [outdir, Config.CACHE_SIGNATURE_SIZES_DIR_TAG]) + [outdir, config.CACHE_SIGNATURE_SIZES_DIR_TAG]) if not os.access(fdone, os.R_OK): return False for myfile in os.listdir(outdir): @@ -127,8 +130,8 @@ def _loadCache(self): def cacheSizes(self): """Find the number of different sizes, and creates that much numpyarray""" # if not os.access - outdir = Config.getCacheFilename( - Config.CACHE_SIGNATURE_SIZES_DIR, + outdir = config.get_cache_filename( + config.CACHE_SIGNATURE_SIZES_DIR, self._context.dumpname) if not os.path.isdir(outdir): os.mkdir(outdir) @@ -147,7 +150,7 @@ def cacheSizes(self): # saved all sizes dictionaries. # tag it as done file( - os.path.sep.join([outdir, Config.CACHE_SIGNATURE_SIZES_DIR_TAG]), 'w') + os.path.sep.join([outdir, config.CACHE_SIGNATURE_SIZES_DIR_TAG]), 'w') self._sizes = arrays return @@ -409,8 +412,8 @@ def graphStructureGroups(context, chains, originAddr=None): print '#', '-' * 78 networkx.readwrite.gexf.write_gexf( graph, - Config.getCacheFilename( - Config.CACHE_GRAPH, + config.get_cache_filename( + config.CACHE_GRAPH, context.dumpname)) @@ -494,12 +497,12 @@ def makeGroupSignature(context, sizeCache): # FIXME: 100 maybe is a bit short try: import pkgutil - _words = pkgutil.get_data(__name__, Config.WORDS_FOR_REVERSE_TYPES_FILE) + _words = pkgutil.get_data(__name__, config.WORDS_FOR_REVERSE_TYPES_FILE) except ImportError: import pkg_resources _words = pkg_resources.resource_string( __name__, - Config.WORDS_FOR_REVERSE_TYPES_FILE) + config.WORDS_FOR_REVERSE_TYPES_FILE) # global _NAMES = [s.strip() for s in _words.split('\n')[:-1]] diff --git a/scripts/haystack-reverse b/scripts/haystack-reverse index 114e4e43..7bb4c084 100644 --- a/scripts/haystack-reverse +++ b/scripts/haystack-reverse @@ -19,183 +19,195 @@ import os log = logging.getLogger('haystack-reverse') from haystack import argparse_utils +from haystack.reverse import config -def make_config(opts): - #make out wordsize. - from haystack.reverse import config - - config.make_config_from_memdump( opts.dumpname ) def reverseInstances(opt): - from haystack.reverse import reversers - context = reversers.reverseInstances(opt.dumpname) - return + from haystack.reverse import reversers + ctx = reversers.reverseInstances(opt.dumpname) + return def writeReversedTypes(opt): - '''reverse types from a memorydump, and write structure definition to file ''' - from haystack.reverse.config import Config - from haystack.reverse import signature - context, sizeCache = signature.makeSizeCaches(opt.dumpname) - context = signature.makeReversedTypes(context, sizeCache) - outfile = file(Config.getCacheFilename(Config.REVERSED_TYPES_FILENAME, context.dumpname),'w') - for revStructType in context.listReversedTypes(): - outfile.write(revStructType.toString()) - outfile.close() - log.info('[+] Wrote to %s'%(outfile.name)) - return + """ + reverse types from a memorydump, and write structure definition to file + :param opt: + :return: + """ + from haystack.reverse import signature + ctx, sizeCache = signature.makeSizeCaches(opt.dumpname) + ctx = signature.makeReversedTypes(ctx, sizeCache) + outfile = file(config.get_cache_filename(config.REVERSED_TYPES_FILENAME, ctx.dumpname),'w') + for revStructType in ctx.listReversedTypes(): + outfile.write(revStructType.toString()) + outfile.close() + log.info('[+] Wrote to %s', outfile.name) + return def groupStructures(opt): - ''' show sorted structure instances groups to stdout ''' - from haystack.reverse import signature - context, sizeCache = signature.makeSizeCaches(opt.dumpname) - for chains in signature.buildStructureGroup(context, sizeCache, opt.size ): - signature.printStructureGroups(context, chains, opt.address ) - return + """ + show sorted structure instances groups to stdout + :param opt: + :return: + """ + from haystack.reverse import signature + ctx, sizeCache = signature.makeSizeCaches(opt.dumpname) + for chains in signature.buildStructureGroup(ctx, sizeCache, opt.size ): + signature.printStructureGroups(ctx, chains, opt.address ) + return def saveSignatures(opt): - ''' translate a memdump into a signature based file NULL,POINTERS,OTHERS''' - from haystack.reverse.config import Config - from haystack.reverse import signature - context, sig = signature.makeSignatures(opt.dumpname) - outfile = Config.getCacheFilename(Config.SIGNATURES_FILENAME, context.dumpname) - file(outfile,'w').write(sig) - log.info('[+] Signature written to %s'%(outfile)) - return + """ + translate a memdump into a signature based file NULL,POINTERS,OTHERS + :param opt: + :return: + """ + from haystack.reverse import signature + ctx, sig = signature.makeSignatures(opt.dumpname) + outfile = config.get_cache_filename(config.SIGNATURES_FILENAME, ctx.dumpname) + file(outfile,'w').write(sig) + log.info('[+] Signature written to %s'%(outfile)) + return def show(opt): - ''' Show a structure ''' - log.info('[+] Load context') - context = reversers.getContext(opt.dumpname) - log.info('[+] Find Structure at: @%x'%(opt.address)) - try: - st = context.getStructureForOffset(opt.address) - st.decodeFields() - print st.toString() - except ValueError,e: - log.info('[+] Found no structure.') + """ + Show a structure + :param opt: + :return: + """ + from haystack.reverse import context + log.info('[+] Load ctx') + ctx = context.get_context(opt.dumpname) + log.info('[+] Find Structure at: @%x', opt.address) + try: + st = ctx.getStructureForOffset(opt.address) + st.decodeFields() + print st.toString() + except ValueError,e: + log.info('[+] Found no structure.') + return return - return def printParents(opt): - ''' print the parental structures ''' - from haystack.reverse import reversers - log.info('[+] Load context') - context = reversers.getContext(opt.dumpname) - log.info('[+] find offsets of struct_addr:%x'%(opt.address)) - i = 0 - try: - child_address = context.getStructureAddrForOffset(opt.address) - for st in context.listStructuresForPointerValue(child_address): - st.decodeFields() - print st.toString() - i+=1 - except ValueError,e: - log.info('[+] Found no structures.') + """ + print the parental structures + :param opt: + :return: + """ + from haystack.reverse import context + log.info('[+] Load ctx') + ctx = context.get_context(opt.dumpname) + log.info('[+] find offsets of struct_addr:%x', opt.address) + i = 0 + try: + child_address = ctx.getStructureAddrForOffset(opt.address) + for st in ctx.listStructuresForPointerValue(child_address): + st.decodeFields() + print st.toString() + i+=1 + except ValueError,e: + log.info('[+] Found no structures.') + return + log.info('[+] Found %d structures.'%( i )) return - log.info('[+] Found %d structures.'%( i )) - return - - def clean(opt): - log.info('[+] Cleaning cache') - context = Config.cleanCache(opt.dumpname) - + log.info('[+] Cleaning cache') + ctx = config.remove_cache_folder(opt.dumpname) def graph(opt): - ''' show sorted structure instances groups to gefx ''' - #log.info('[+] Graphing') - #context, sizeCache = signature.makeSizeCaches(opt.dumpname) - #for chains in signature.buildStructureGroup(context, sizeCache, opt.size ): - # signature.graphStructureGroups(context, chains, opt.address ) - # TODO change to generic fn, and output graph - return - + ''' show sorted structure instances groups to gefx ''' + #log.info('[+] Graphing') + #ctx, sizeCache = signature.makeSizeCaches(opt.dumpname) + #for chains in signature.buildStructureGroup(ctx, sizeCache, opt.size ): + # signature.graphStructureGroups(ctx, chains, opt.address ) + # TODO change to generic fn, and output graph + return def argparser(): - rootparser = argparse.ArgumentParser(prog='haystack-reverse', - description='Several tools to reverse engineer structures on the heap.') - - rootparser.add_argument('--debug', action='store_true', help='Debug mode on.') - rootparser.add_argument('dumpname', type=argparse_utils.readable, action='store', help='Source memory dump by haystack.') - - subparsers = rootparser.add_subparsers(help='sub-command help') - - instances = subparsers.add_parser('instances', - help='List all structures instances with virtual address, member types guess and info.') - instances.set_defaults(func=reverseInstances) - - typemap = subparsers.add_parser('typemap', - help='Try to reverse generic types from instances\' similarities.') - typemap.set_defaults(func=writeReversedTypes) - - groupparser = subparsers.add_parser('group', help='Show structure instances groups by size and signature.') - groupparser.add_argument('--size', type=int, action='store', default=None, - help='Limit to a specific structure size') - groupparser.add_argument('--address', type=argparse_utils.int16, action='store', default=None, - help='Limit to structure similar to the structure pointed at
') - groupparser.set_defaults(func=groupStructures) - - parent = subparsers.add_parser('parent', help='Print the parent structures pointing to the structure located at this address.') - parent.add_argument('address', type=argparse_utils.int16, action='store', default=None, - help='Hex address of the child structure.') - parent.set_defaults(func=printParents) - - graphparser = subparsers.add_parser('graph', help='DISABLED - Show sorted structure instances groups by size and signature in a graph.') - graphparser.add_argument('--size', type=int, action='store', default=None, - help='Limit to a specific structure size') - graphparser.add_argument('--address', type=argparse_utils.int16, action='store', default=None, - help='Limit to structure similar to the structure pointed at
') - graphparser.set_defaults(func=graph) - - showparser = subparsers.add_parser('show', help='Show one structure instance.') - showparser.add_argument('address', type=argparse_utils.int16, action='store', default=None, - help='Specify the address of the structure, or of a structure member.') - showparser.set_defaults(func=show) - - # XXX delete ? - makesig = subparsers.add_parser('makesig', help='Create a simple signature file of the heap - NULL, POINTERS, OTHER VALUES.') - makesig.set_defaults(func=saveSignatures) - - cleanp = subparsers.add_parser('clean', help='Clean the memory dump from cached info.') - cleanp.set_defaults(func=clean) - - - return rootparser + rootparser = argparse.ArgumentParser(prog='haystack-reverse', + description='Several tools to reverse engineer structures on the heap.') + + rootparser.add_argument('--debug', action='store_true', help='Debug mode on.') + rootparser.add_argument('dumpname', type=argparse_utils.readable, action='store', help='Source memory dump by haystack.') + + subparsers = rootparser.add_subparsers(help='sub-command help') + + instances = subparsers.add_parser('instances', + help='List all structures instances with virtual address, member types guess and info.') + instances.set_defaults(func=reverseInstances) + + # not refactored yet + #typemap = subparsers.add_parser('typemap', + # help='Try to reverse generic types from instances\' similarities.') + #typemap.set_defaults(func=writeReversedTypes) + + # not refactored yet + #groupparser = subparsers.add_parser('group', help='Show structure instances groups by size and signature.') + #groupparser.add_argument('--size', type=int, action='store', default=None, + # help='Limit to a specific structure size') + #groupparser.add_argument('--address', type=argparse_utils.int16, action='store', default=None, + # help='Limit to structure similar to the structure pointed at
') + #groupparser.set_defaults(func=groupStructures) + + # not refactored yet + #parent = subparsers.add_parser('parent', help='Print the parent structures pointing to the structure located at this address.') + #parent.add_argument('address', type=argparse_utils.int16, action='store', default=None, + # help='Hex address of the child structure.') + #parent.set_defaults(func=printParents) + + # not refactored yet + #graphparser = subparsers.add_parser('graph', help='DISABLED - Show sorted structure instances groups by size and signature in a graph.') + #graphparser.add_argument('--size', type=int, action='store', default=None, + # help='Limit to a specific structure size') + #graphparser.add_argument('--address', type=argparse_utils.int16, action='store', default=None, + # help='Limit to structure similar to the structure pointed at
') + #graphparser.set_defaults(func=graph) + + # not refactored yet + #showparser = subparsers.add_parser('show', help='Show one structure instance.') + #showparser.add_argument('address', type=argparse_utils.int16, action='store', default=None, + # help='Specify the address of the structure, or of a structure member.') + #showparser.set_defaults(func=show) + + # FIXME delete ? + makesig = subparsers.add_parser('makesig', help='Create a simple signature file of the heap - NULL, POINTERS, OTHER VALUES.') + makesig.set_defaults(func=saveSignatures) + + cleanp = subparsers.add_parser('clean', help='Clean the memory dump from cached info.') + cleanp.set_defaults(func=clean) + + return rootparser def main(argv): - parser = argparser() - opts = parser.parse_args(argv) - - level=logging.WARNING - if opts.debug : - level=logging.DEBUG - flog = os.path.normpath('log') - logging.basicConfig(level=level, filename=flog, filemode='w') - logging.getLogger('haystack-reverse').setLevel(logging.DEBUG) - logging.getLogger('signature').setLevel(logging.DEBUG) - logging.getLogger('reversers').setLevel(logging.DEBUG) - print ('[+] **** COMPLETE debug log to %s'%(flog)) - else: - logging.getLogger('haystack-reverse').setLevel(logging.INFO) - logging.getLogger('signature').setLevel(logging.INFO) - logging.getLogger('reversers').setLevel(logging.INFO) - logging.getLogger('context').setLevel(logging.INFO) - sh=logging.StreamHandler(sys.stdout) # 2.6, 2.7 compat - logging.getLogger('signature').addHandler( sh ) - logging.getLogger('reversers').addHandler( sh ) - logging.getLogger('haystack-reverse').addHandler( sh ) - - make_config(opts) - opts.func(opts) - - - + parser = argparser() + opts = parser.parse_args(argv) + + level=logging.WARNING + if opts.debug : + level=logging.DEBUG + flog = os.path.normpath('log') + logging.basicConfig(level=level, filename=flog, filemode='w') + logging.getLogger('haystack-reverse').setLevel(logging.DEBUG) + logging.getLogger('signature').setLevel(logging.DEBUG) + logging.getLogger('reversers').setLevel(logging.DEBUG) + print ('[+] **** COMPLETE debug log to %s'%(flog)) + else: + logging.getLogger('haystack-reverse').setLevel(logging.INFO) + logging.getLogger('signature').setLevel(logging.INFO) + logging.getLogger('reversers').setLevel(logging.INFO) + logging.getLogger('ctx').setLevel(logging.INFO) + sh=logging.StreamHandler(sys.stdout) # 2.6, 2.7 compat + logging.getLogger('signature').addHandler( sh ) + logging.getLogger('reversers').addHandler( sh ) + logging.getLogger('haystack-reverse').addHandler( sh ) + + opts.func(opts) + return if __name__ == "__main__": - sys.path.append(os.getcwd()) - main(sys.argv[1:]) + sys.path.append(os.getcwd()) + main(sys.argv[1:])