diff --git a/.travis.yml b/.travis.yml index 3580d19e..4db1254a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,7 @@ language: python python: - "2.7" -# - "3.5" + - "3.5" # we are using pre-dumped tests files # and not testing the dumping capacity of haystack. diff --git a/haystack/__init__.py b/haystack/__init__.py index bbfad986..69e3be50 100644 --- a/haystack/__init__.py +++ b/haystack/__init__.py @@ -1,69 +1 @@ -# -*- coding: utf-8 -*- - -""" -:mod:`haystack` -- a package to search known C or ctypes allocators in memory. -============================================================================== -.. module:: haystack - :platform: Unix, Windows - :synopsys: Search, reverse C/ctypes allocators from memory. -.. moduleauthor:: Loic Jaquemet - -Available subpackages ---------------------- -gui - An attempt to make a Qt4 GUI. -reverse - Framework to reverse engineer memory allocators - -""" - -__author__ = "Loic Jaquemet loic.jaquemet+python@gmail.com" - -__all__ = [ -] - -# verify the version -from pkg_resources import get_distribution, DistributionNotFound -import os.path - -try: - _dist = get_distribution('haystack') - # Normalize case for Windows systems - dist_loc = os.path.normcase(_dist.location) - here = os.path.normcase(__file__) - if not here.startswith(os.path.join(dist_loc, 'haystack')): - # not installed, but there is another version that *is* - raise DistributionNotFound -except DistributionNotFound: - __version__ = 'Please install this project with setup.py' -else: - __version__ = _dist.version - -# search API -from haystack.search import api -search_record = api.search_record -output_to_string = api.output_to_string -output_to_python = api.output_to_python - -try: - import resource - # augment our file limit capacity to max - maxnofile = resource.getrlimit(resource.RLIMIT_NOFILE) - # print 'maxnofile', maxnofile - resource.setrlimit( - resource.RLIMIT_NOFILE, - (maxnofile[1], - maxnofile[1])) - # maxnofile_after = resource.getrlimit(resource.RLIMIT_NOFILE) - # print 'maxnofile_after', maxnofile_after - # travis-ci says - # maxnofile (64000, 64000) - # maxnofile_after (64000, 64000) -except ImportError as e: - pass - - -# bad bad idea... -MMAP_HACK_ACTIVE = True -# do not load huge mmap -MAX_MAPPING_SIZE_FOR_MMAP = 1024 * 1024 * 20 +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/haystack/dump_loader.py b/haystack/dump_loader.py index 97c7eccd..1f657327 100644 --- a/haystack/dump_loader.py +++ b/haystack/dump_loader.py @@ -43,6 +43,12 @@ log = logging.getLogger('dump_loader') +# bad bad idea... +MMAP_HACK_ACTIVE = True +# do not load huge mmap +MAX_MAPPING_SIZE_FOR_MMAP = 1024 * 1024 * 20 + + class LazyLoadingException(Exception): def __init__(self, filename): @@ -178,7 +184,7 @@ def _load_memory_mappings(self): mmap = AMemoryMapping(start, end, permissions, offset, major_device, minor_device, inode, pathname=pathname) mmap = LocalMemoryMapping.fromBytebuffer(mmap, mmap_content_file.read()) # use file mmap when file is too big - elif end - start > haystack.MAX_MAPPING_SIZE_FOR_MMAP: + elif end - start > MAX_MAPPING_SIZE_FOR_MMAP: log.warning('Using a file backed memory mapping. no mmap in memory for this memorymap (%s).' % (pathname) + ' Search will fail. Buffer is needed.') mmap = FileBackedMemoryMapping(mmap_content_file.name, start, end, permissions, offset, major_device, minor_device, inode, pathname=pathname) diff --git a/haystack/mappings/file.py b/haystack/mappings/file.py index 30ef2387..bc2645b8 100644 --- a/haystack/mappings/file.py +++ b/haystack/mappings/file.py @@ -43,16 +43,9 @@ from haystack.mappings import base from haystack.mappings.base import AMemoryMapping -__author__ = "Loic Jaquemet" -__copyright__ = "Copyright (C) 2012 Loic Jaquemet" -__email__ = "loic.jaquemet+python@gmail.com" -__license__ = "GPL" -__maintainer__ = "Loic Jaquemet" -__status__ = "Production" -__credits__ = ["Victor Skinner"] - log = logging.getLogger('file') +MMAP_HACK_ACTIVE = True class LocalMemoryMapping(AMemoryMapping): @@ -219,7 +212,7 @@ def _mmap(self): if hasattr(self._memdump, 'fileno'): # normal file. # XXX that is the most fucked up, non-portable fuck I ever # wrote. - if haystack.MMAP_HACK_ACTIVE: + if MMAP_HACK_ACTIVE: log.debug('Using MMAP_HACK: %s' % self) # if self.pathname.startswith('/usr/lib'): # raise Exception diff --git a/haystack/model.py b/haystack/model.py index 632e5ffe..eaf01aac 100644 --- a/haystack/model.py +++ b/haystack/model.py @@ -1,7 +1,4 @@ # -*- coding: utf-8 -*- -# -# Copyright (C) 2011,2012,2013 Loic Jaquemet loic.jaquemet+python@gmail.com -# import ctypes import inspect @@ -23,17 +20,28 @@ LoadException(Exception) """ -__author__ = "Loic Jaquemet" -__copyright__ = "Copyright (C) 2013 Loic Jaquemet" -__email__ = "loic.jaquemet+python@gmail.com" -__license__ = "GPL" -__maintainer__ = "Loic Jaquemet" -__status__ = "Production" + +log = logging.getLogger('model') +try: + import resource + # augment our file limit capacity to max + maxnofile = resource.getrlimit(resource.RLIMIT_NOFILE) + # print 'maxnofile', maxnofile + resource.setrlimit( + resource.RLIMIT_NOFILE, + (maxnofile[1], + maxnofile[1])) + # maxnofile_after = resource.getrlimit(resource.RLIMIT_NOFILE) + # print 'maxnofile_after', maxnofile_after + # travis-ci says + # maxnofile (64000, 64000) + # maxnofile_after (64000, 64000) +except ImportError as e: + pass -log = logging.getLogger('model') class NotValid(Exception): diff --git a/haystack/reverse/__init__.py b/haystack/reverse/__init__.py deleted file mode 100644 index be9b984f..00000000 --- a/haystack/reverse/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -__author__ = "Loic Jaquemet loic.jaquemet+python@gmail.com" - -__doc__ = ''' - Tools to reverse engineer the memory allocators present in a MemoryMapping. -''' - -all = [ -] diff --git a/haystack/reverse/api.py b/haystack/reverse/api.py deleted file mode 100644 index f81d305d..00000000 --- a/haystack/reverse/api.py +++ /dev/null @@ -1,169 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import print_function -import logging - -from haystack.abc import interfaces -from haystack.reverse import config -from haystack.reverse import context -from haystack.reverse.heuristics import reversers -from haystack.reverse.heuristics import dsa -from haystack.reverse.heuristics import pointertypes - -log = logging.getLogger('reverse.api') - - -def save_headers(heap_context, addrs=None): - """ - Save the python class code definition to file. - - :param heap_context: - :param addrs: - :return: - """ - # structs_addrs is sorted - log.info('[+] saving headers') - fout = open(heap_context.get_filename_cache_headers(), 'w') - towrite = [] - if addrs is None: - addrs = iter(heap_context.listStructuresAddresses()) - # - for vaddr in addrs: - # anon = context._get_structures()[vaddr] - anon = heap_context.get_record_for_address(vaddr) - towrite.append(anon.to_string()) - if len(towrite) >= 10000: - try: - fout.write('\n'.join(towrite)) - except UnicodeDecodeError as e: - print('ERROR on ', anon) - towrite = [] - fout.flush() - fout.write('\n'.join(towrite)) - fout.close() - return - - -def reverse_heap(memory_handler, heap_addr): - """ - Reverse a specific heap. - - :param memory_handler: - :param heap_addr: - :return: - """ - from haystack.reverse import context - log.info('[+] Loading the memory dump for HEAP 0x%x', heap_addr) - heap_context = context.get_context_for_address(memory_handler, heap_addr) - try: - # decode bytes contents to find basic types. - log.info('Reversing Fields') - fr = dsa.FieldReverser(memory_handler) - fr.reverse_context(heap_context) - - log.info('Fixing Text Fields') - tfc = dsa.TextFieldCorrection(memory_handler) - tfc.reverse_context(heap_context) - - # try to find some logical constructs. - log.info('Reversing DoubleLinkedListReverser') - # why is this a reverse_context ? - doublelink = reversers.DoubleLinkedListReverser(memory_handler) - doublelink.reverse_context(heap_context) - doublelink.rename_all_lists() - - # save to file - save_headers(heap_context) - - # etc - except KeyboardInterrupt as e: - # except IOError,e: - log.warning(e) - log.info('[+] %d structs extracted' % (heap_context.get_record_count())) - raise e - pass - pass - return heap_context - - -def reverse_instances(memory_handler): - """ - Reverse all heaps in process from memory_handler - - :param memory_handler: - :return: - """ - assert isinstance(memory_handler, interfaces.IMemoryHandler) - process_context = memory_handler.get_reverse_context() - #for heap in heaps: - # # reverse all fields in all records from that heap - # ## reverse_heap(memory_handler, heap_addr) - - log.info('Reversing Fields') - fr = dsa.FieldReverser(memory_handler) - fr.reverse() - - log.info('Fixing Text Fields') - tfc = dsa.TextFieldCorrection(memory_handler) - tfc.reverse() - - # try to find some logical constructs. - log.info('Reversing DoubleLinkedListReverser') - # why is this a reverse_context ? - doublelink = reversers.DoubleLinkedListReverser(memory_handler) - doublelink.reverse() - doublelink.rename_all_lists() - - # then and only then can we look at the PointerFields - # identify pointer relation between allocators - log.info('Reversing PointerFields') - pfr = pointertypes.PointerFieldReverser(memory_handler) - pfr.reverse() - - # save that - log.info('Saving reversed records instances') - for heap_context in process_context.list_contextes(): - heap_context.save_structures() - # save to file - save_headers(heap_context) - - log.info('Saving reversed records types') - process_context.save_reversed_types() - - # graph pointer relations between allocators - log.info('Reversing PointerGraph') - ptrgraph = reversers.PointerGraphReverser(memory_handler) - ptrgraph.reverse() - - # extract all strings - log.info('Reversing strings') - strout = reversers.StringsReverser(memory_handler) - strout.reverse() - - log.info('Analysis results are in %s', config.get_cache_folder_name(memory_handler.get_name())) - return process_context - - -def get_record_at_address(memory_handler, record_address): - """ - Returns the record athe specified address. - - :param memory_handler: - :param record_address: - :return: - """ - heap_context = context.get_context_for_address(memory_handler, record_address) - return heap_context.get_record_at_address(record_address) - - -def get_record_predecessors(memory_handler, record): - """ - Returns the predecessors of this record. - - :param memory_handler: - :param record: - :return: - """ - process_context = memory_handler.get_reverse_context() - _records = process_context.get_predecessors(record) - return _records diff --git a/haystack/reverse/cli.py b/haystack/reverse/cli.py deleted file mode 100644 index ab211291..00000000 --- a/haystack/reverse/cli.py +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import print_function - -"""Entry points related to reverse. """ - -import os -import sys - -from haystack import argparse_utils -from haystack import cli -from haystack.reverse import api - -# the description of the function -REVERSE_DESC = 'Reverse the data structure from the process memory' -REVERSE_SHOW_DESC = 'Show the record at a specific address' -REVERSE_PARENT_DESC = 'List the predecessors pointing to the record at this address' -REVERSE_HEX_DESC = 'Show the Hex values for the record at that address.' - - -def reverse_argparser(reverse_parser): - reverse_parser.set_defaults(func=reverse_cmdline) - return reverse_parser - - -def reverse_show_argparser(show_parser): - """ Show function options argument parser """ - show_parser.add_argument('address', type=argparse_utils.int16, help='Record memory address in hex') - show_parser.set_defaults(func=reverse_show_cmdline) - return show_parser - - -def reverse_parents_argparser(parents_parser): - parents_parser.add_argument('address', type=argparse_utils.int16, action='store', default=None, - help='Hex address of the child structure') - parents_parser.set_defaults(func=show_predecessors_cmdline) - return parents_parser - - -def reverse_hex_argparser(hex_parser): - hex_parser.add_argument('address', type=argparse_utils.int16, action='store', default=None, - help='Specify the address of the record, or encompassed by the record') - hex_parser.set_defaults(func=show_hex) - return hex_parser - - -def show_hex(args): - """ Show the Hex values for the record at that address. """ - memory_handler = cli.get_memory_handler(args) - process_context = memory_handler.get_reverse_context() - ctx = process_context.get_context_for_address(args.address) - try: - st = ctx.get_record_at_address(args.address) - print(repr(st.bytes)) - except ValueError as e: - print(None) - return - - -def show_predecessors_cmdline(args): - """ - Show the predecessors that point to a record at a particular address. - :param opt: - :return: - """ - memory_handler = cli.get_memory_handler(args) - process_context = memory_handler.get_reverse_context() - ctx = process_context.get_context_for_address(args.address) - try: - child_record = ctx.get_record_at_address(args.address) - except ValueError as e: - print(None) - return - - records = api.get_record_predecessors(memory_handler, child_record) - if len(records) == 0: - print(None) - else: - for p_record in records: - print('#0x%x\n%s\n' % (p_record.address, p_record.to_string())) - return - - -def reverse_show_cmdline(args): - """ Show the record at a specific address. """ - memory_handler = cli.get_memory_handler(args) - process_context = memory_handler.get_reverse_context() - ctx = process_context.get_context_for_address(args.address) - try: - st = ctx.get_record_at_address(args.address) - print(st.to_string()) - except ValueError: - print(None) - return - - -def reverse_cmdline(args): - """ Reverse """ - from haystack.reverse import api as rapi - # get the memory handler adequate for the type requested - memory_handler = cli.get_memory_handler(args) - # do the search - rapi.reverse_instances(memory_handler) - return - - -def main_reverse(): - argv = sys.argv[1:] - desc = REVERSE_DESC + cli.DUMPTYPE_BASE_DESC - rootparser = cli.base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc) - rootparser.add_argument('dump_folder_name', type=argparse_utils.readable, help='Use this memory dump folder') - reverse_argparser(rootparser) - opts = rootparser.parse_args(argv) - opts.dumptype = cli.DUMPTYPE_BASE - # apply verbosity - cli.set_logging_level(opts) - # execute function - opts.func(opts) - return - - -def minidump_reverse(): - argv = sys.argv[1:] - desc = REVERSE_DESC + cli.DUMPTYPE_MINIDUMP_DESC - rootparser = cli.base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc) - rootparser.add_argument('dump_filename', type=argparse_utils.readable, help='Use this memory dump file') - reverse_argparser(rootparser) - opts = rootparser.parse_args(argv) - opts.dumptype = cli.DUMPTYPE_MINIDUMP - # apply verbosity - cli.set_logging_level(opts) - # execute function - opts.func(opts) - return - - -def main_reverse_show(): - argv = sys.argv[1:] - desc = REVERSE_SHOW_DESC + cli.DUMPTYPE_BASE_DESC - rootparser = cli.base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc) - rootparser.add_argument('dump_folder_name', type=argparse_utils.readable, help='Use this memory dump folder') - reverse_show_argparser(rootparser) - opts = rootparser.parse_args(argv) - opts.dumptype = cli.DUMPTYPE_BASE - # apply verbosity - cli.set_logging_level(opts) - # execute function - opts.func(opts) - return - - -def minidump_reverse_show(): - argv = sys.argv[1:] - desc = REVERSE_SHOW_DESC + cli.DUMPTYPE_MINIDUMP_DESC - rootparser = cli.base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc) - rootparser.add_argument('dump_filename', type=argparse_utils.readable, help='Use this memory dump file') - reverse_show_argparser(rootparser) - opts = rootparser.parse_args(argv) - opts.dumptype = cli.DUMPTYPE_MINIDUMP - # apply verbosity - cli.set_logging_level(opts) - # execute function - opts.func(opts) - return - - -def main_reverse_parents(): - argv = sys.argv[1:] - desc = REVERSE_PARENT_DESC + cli.DUMPTYPE_BASE_DESC - rootparser = cli.base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc) - rootparser.add_argument('dump_folder_name', type=argparse_utils.readable, help='Use this memory dump folder') - reverse_parents_argparser(rootparser) - opts = rootparser.parse_args(argv) - opts.dumptype = cli.DUMPTYPE_BASE - # apply verbosity - cli.set_logging_level(opts) - # execute function - opts.func(opts) - return - - -def minidump_reverse_parents(): - argv = sys.argv[1:] - desc = REVERSE_PARENT_DESC + cli.DUMPTYPE_MINIDUMP_DESC - rootparser = cli.base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc) - rootparser.add_argument('dump_filename', type=argparse_utils.readable, help='Use this memory dump file') - reverse_parents_argparser(rootparser) - opts = rootparser.parse_args(argv) - opts.dumptype = cli.DUMPTYPE_MINIDUMP - # apply verbosity - cli.set_logging_level(opts) - # execute function - opts.func(opts) - return - - -def main_reverse_hex(): - argv = sys.argv[1:] - desc = REVERSE_HEX_DESC + cli.DUMPTYPE_BASE_DESC - rootparser = cli.base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc) - rootparser.add_argument('dump_folder_name', type=argparse_utils.readable, help='Use this memory dump folder') - reverse_hex_argparser(rootparser) - opts = rootparser.parse_args(argv) - opts.dumptype = cli.DUMPTYPE_BASE - # apply verbosity - cli.set_logging_level(opts) - # execute function - opts.func(opts) - return - - -def minidump_reverse_hex(): - argv = sys.argv[1:] - desc = REVERSE_HEX_DESC + cli.DUMPTYPE_MINIDUMP_DESC - rootparser = cli.base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc) - rootparser.add_argument('dump_filename', type=argparse_utils.readable, help='Use this memory dump file') - reverse_hex_argparser(rootparser) - opts = rootparser.parse_args(argv) - opts.dumptype = cli.DUMPTYPE_MINIDUMP - # apply verbosity - cli.set_logging_level(opts) - # execute function - opts.func(opts) - return diff --git a/haystack/reverse/config.py b/haystack/reverse/config.py deleted file mode 100644 index ddccabd9..00000000 --- a/haystack/reverse/config.py +++ /dev/null @@ -1,102 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Default configuration for filenames, output directories and such.""" - -import logging -import shutil - -import os - -log = logging.getLogger('config') - - -cacheDir = os.path.normpath('/tmp/') -imgCacheDir = os.path.sep.join([cacheDir, 'img']) -commentMaxSize = 64 -# -DUMPNAME_INDEX_FILENAME = '_memory_handler' -CACHE_NAME = 'cache' -CACHE_STRUCT_DIR = 'structs' -# cache file names -CACHE_GENERATED_PY_HEADERS_VALUES = 'headers_values.py' -CACHE_GENERATED_PY_HEADERS = 'headers.py' -CACHE_HS_POINTERS_VALUES = 'heap+stack.pointers.values' -CACHE_HEAP_ADDRS = 'heap.pointers.offsets' -CACHE_HEAP_VALUES = 'heap.pointers.values' -CACHE_STACK_ADDRS = 'stack.pointers.offsets' -CACHE_STACK_VALUES = 'stack.pointers.values' -CACHE_ALL_PTRS_ADDRS = 'all.pointers.offsets' -CACHE_ALL_PTRS_VALUES = 'all.pointers.values' -CACHE_FUNCTION_NAMES = 'names.pointers.functions' -CACHE_STRUCTURES = 'allocators' -CACHE_MALLOC_CHUNKS_ADDRS = 'mchunks.addrs' -CACHE_MALLOC_CHUNKS_SIZES = 'mchunks.sizes' -CACHE_CONTEXT = 'ctx' -CACHE_GRAPH = 'graph.gexf' -CACHE_GRAPH_HEAP = 'graph.heaps.gexf' -DIFF_PY_HEADERS = 'diff_headers' -CACHE_SIGNATURE_SIZES_DIR = 'structs.sizes.d' -CACHE_SIGNATURE_SIZES_DIR_TAG = 'done' -CACHE_SIGNATURE_GROUPS_DIR = 'structs.groups.d' -CACHE_STRINGS = 'strings' -REVERSED_TYPES_FILENAME = 'reversed_types.py' -SIGNATURES_FILENAME = 'signatures' -WORDS_FOR_REVERSE_TYPES_FILE = 'data/words.100' - - -def create_cache_folder(dumpname): - root = os.path.abspath(dumpname) - if not os.path.isdir(root): - os.mkdir(root) - folder = get_cache_folder_name(dumpname) - if not os.path.isdir(folder): - os.mkdir(folder) - if not os.access(folder, os.W_OK): - raise IOError('cannot write to %s' % folder) - return - - -def remove_cache_folder(dumpname): - folder = get_cache_folder_name(dumpname) - if os.path.isdir(folder): - shutil.rmtree(folder) - return - - -def get_cache_folder_name(dumpname): - root = os.path.abspath(dumpname) - return os.path.sep.join([root, CACHE_NAME]) - - -def get_cache_filename(typ, dumpname, address=None): - """ - Returns a filename for caching a type of data based on the dump filename. - - :param typ: one of Config.CACHE_XX types. - :param dumpname: the dumpname to get the cache folder - :param address: a optional unique identifier - :return: - """ - fname = typ - if address is not None: - fname = '%x.%s' % (address, typ) - return os.path.sep.join([get_cache_folder_name(dumpname), fname]) - - -def get_record_cache_folder_name(dumpname): - """ - Returns a dirname for caching the allocators based on the dump filename. - - dumpname: the dump file name. - """ - root = os.path.abspath(dumpname) - return os.path.sep.join([root, CACHE_NAME, CACHE_STRUCT_DIR]) - - -def create_record_cache_folder(dumpname): - folder = get_record_cache_folder_name(dumpname) - if not os.path.isdir(folder): - os.mkdir(folder) - if not os.access(folder, os.W_OK): - raise IOError('cannot write to %s' % folder) - return diff --git a/haystack/reverse/context.py b/haystack/reverse/context.py deleted file mode 100644 index f9b64615..00000000 --- a/haystack/reverse/context.py +++ /dev/null @@ -1,548 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import print_function -from past.builtins import long -from builtins import map - -import logging -import pickle -# import dill as pickle -import time -import numpy -import os - -from haystack.abc import interfaces -from haystack.reverse import utils -from haystack.reverse import config -from haystack.reverse import structure -from haystack.reverse import searchers -from haystack.reverse import matchers -from haystack.reverse import enumerators - - -log = logging.getLogger('context') - - -class ProcessContext(object): - """ - The main context for all heap - """ - def __init__(self, memory_handler): - self.memory_handler = memory_handler - # init heaps - self.__contextes = {} - for walker in self.memory_handler.get_heap_finder().list_heap_walkers(): - self.get_context_for_heap_walker(walker) - # init reversed types - self.__reversed_types = {} - self.__record_graph = None - # see bug #17 self.__model = model.Model(self.memory_handler) - # no need for that - # create the cache folder then - self.create_cache_folders() - - # def get_functions_pointers(self): - # try: - # return self.get_cache_radare() - # except IOError as e: - # return self.save_cache_radare() - # - # def get_cache_radare(self): - # dumpname = self.memory_handler.get_name() - # fname = config.get_cache_filename(config.CACHE_FUNCTION_NAMES, dumpname) - # functions = None - # try: - # with file(fname, 'r') as fin: - # functions = pickle.load(fin) - # except EOFError as e: - # os.remove(fname) - # log.error('Error in the radare cache file. File cleaned. Please restart.') - # raise RuntimeError('Error in the radare cache file. File cleaned. Please restart.') - # return functions - # - # def save_cache_radare(self): - # from haystack.reverse.heuristics import radare - # func = radare.RadareAnalysis(self.memory_handler) - # func.init_all_functions() - # import code - # code.interact(local=locals()) - # dumpname = self.memory_handler.get_name() - # fname = config.get_cache_filename(config.CACHE_FUNCTION_NAMES, dumpname) - # with file(fname, 'w') as fout: - # pickle.dump(func.functions, fout) - # return func.functions - - def create_cache_folders(self): - """Removes the cache folder""" - dumpname = self.memory_handler.get_name() - # create the cache folder - config.create_cache_folder(dumpname) - # and the record subfolder - self.create_record_cache_folder() - - def create_record_cache_folder(self): - # and the record subfolder - dumpname = self.memory_handler.get_name() - config.create_record_cache_folder(dumpname) - - def _set_context_for_heap_walker(self, walker, ctx): - """Caches the HeapContext associated to a IHeapWalker""" - self.__contextes[walker.get_heap_address()] = ctx - - def get_context_for_heap_walker(self, walker): - """Returns the HeapContext associated to a Heap represented by a HeapWalker""" - if not isinstance(walker, interfaces.IHeapWalker): - raise TypeError('heap should be a IHeapWalker') - heap_address = walker.get_heap_address() - if heap_address not in self.__contextes: - heap_context = self.make_context_for_heap_walker(walker) - self._set_context_for_heap_walker(walker, heap_context) - return heap_context - return self.__contextes[heap_address] - - def get_context_for_address(self, address): - """ - Returns the haystack.reverse.context.HeapContext of the process - for the HEAP that hosts this address - """ - assert isinstance(address, long) or isinstance(address, int) - heap_mapping = self.memory_handler.get_mapping_for_address(address) - if not heap_mapping: - raise ValueError("Invalid address: 0x%x", address) - finder = self.memory_handler.get_heap_finder() - walker = finder.get_heap_walker(heap_mapping) - if not walker: - raise ValueError("Address is not in heap: 0x%x", address) - heap_context = self.get_context_for_heap_walker(walker) - return heap_context - - def make_context_for_heap_walker(self, walker): - """ - Make the HeapContext for this heap walker. - This will reverse all user allocations from this HEAP into records. - """ - heap_addr = walker.get_heap_address() - try: - ctx = HeapContext.cacheLoad(self.memory_handler, heap_addr) - log.debug("Cache avoided HeapContext initialisation") - except IOError as e: - # heaps are already generated at initialisation of self - mapping = self.memory_handler.get_mapping_for_address(heap_addr) - walker = self.memory_handler.get_heap_finder().get_heap_walker(mapping) - ctx = HeapContext(self.memory_handler, walker) - return ctx - - def list_contextes(self): - """Returns all known HeapContext""" - return self.__contextes.values() - - def get_reversed_type(self, typename): - if typename in self.__reversed_types: - return self.__reversed_types[typename] - return None - - def add_reversed_type(self, typename, t): - self.__reversed_types[typename] = t - - def list_reversed_types(self): - return self.__reversed_types.keys() - - #def _load_reversed_types(self): - # self.__reversed_types = pickle.load() - - def save_reversed_types(self): - """ - Save the python class code definition to file. - """ - fout = open(self.get_filename_cache_headers(), 'w') - towrite = ['# This file contains record types deduplicated by instance', - '# Unique values for each field of the record are listed'] - # - nb_total = 0 - nb_unique = 0 - for nb_unique, r_type in enumerate(self.list_reversed_types()): - members = self.get_reversed_type(r_type) - nb_total += len(members) - from haystack.reverse.heuristics import constraints - rev = constraints.ConstraintsReverser(self.memory_handler) - txt = rev.verify(r_type, members) - towrite.extend(txt) - towrite.append("# %d members" % len(members)) - towrite.append(r_type.to_string()) - if len(towrite) >= 10000: - try: - fout.write('\n'.join(towrite)) - except UnicodeDecodeError as e: - print('ERROR on ', r_type) - towrite = [] - fout.flush() - # add some stats - towrite.insert(2, '# Stats: unique_types:%d total_instances:%d' % (nb_unique, nb_total)) - fout.write('\n'.join(towrite)) - fout.close() - return - - def _load_graph_cache(self): - from haystack.reverse.heuristics import reversers - graph_rev = reversers.PointerGraphReverser(self.memory_handler) - self.__record_graph = graph_rev.load_process_graph() - - def get_predecessors(self, record): - """ - Returns the list of record pointing to this record. - - :param: record - :return list - """ - if self.__record_graph is None: - self._load_graph_cache() - predecessors_label = self.__record_graph.predecessors(hex(record.address)) - records = [] - for label in predecessors_label: - # FIXME, eradicate all L for PY3 migration - if label[-1] == 'L': - label = label[:-1] - record_addr = int(label, 16) - heap_context = self.get_context_for_address(record_addr) - records.append(heap_context.get_record_for_address(record_addr)) - return records - - def get_filename_cache_headers(self): - dumpname = self.memory_handler.get_name() - return config.get_cache_filename(config.CACHE_GENERATED_PY_HEADERS_VALUES, dumpname) - - -class HeapContext(object): - """ - The HeapContext is a stateful instance around a Heap. - The context contains cache helpers around the reversing of records. - """ - - def __init__(self, memory_handler, walker): - """ - - :param memory_handler: IMemoryHandler - :param walker: IHeapWalker - :return: - """ - self.memory_handler = memory_handler - # cache it - ### memory_handler.set_context_for_heap(heap, self) - self.dumpname = memory_handler.get_name() - self.walker = walker - self._heap_start = walker.get_heap_address() - self._function_names = dict() - # refresh heap pointers list and allocators chunks - self._reversedTypes = dict() - self._structures = None - self._init2() - return - - def _init2(self): - log.debug('[+] HeapContext on heap 0x%x', self._heap_start) - # Check that cache folder exists - config.create_cache_folder(self.dumpname) - - # re-open the heap walker - heap_mapping = self.memory_handler.get_mapping_for_address(self._heap_start) - finder = self.memory_handler.get_heap_finder() - self.walker = finder.get_heap_walker(heap_mapping) - - # we need a heap walker to parse all allocations - log.debug('[+] Searching pointers in heap') - # get all pointers found in from allocated space. - all_offsets, all_values = self.get_heap_pointers_from_allocated(self.walker) - self._pointers_values = all_values - self._pointers_offsets = all_offsets - - log.debug('[+] Gathering allocated heap chunks') - res = utils.cache_get_user_allocations(self, self.walker) - self._structures_addresses, self._structures_sizes = res - - # clean a bit the open fd's - self.walker = None - self.memory_handler.reset_mappings() - # CAUTION: all heap walker, mappings are resetted. - # Segmentation Fault will ensue if we don't restore heap walkers. - heap_mapping = self.memory_handler.get_mapping_for_address(self._heap_start) - finder = self.memory_handler.get_heap_finder() - self.walker = finder.get_heap_walker(heap_mapping) - - - #if self.memory_handler.get_target_platform().get_os_name() not in ['winxp', 'win7']: - # log.info('[+] Reversing function pointers names') - # # TODO in reversers - # # dict(libdl.reverseLocalFonctionPointerNames(self) ) - # self._function_names = dict() - return - - def _is_record_cache_dirty(self): - return self._structures is None or len(self._structures) != len(self._structures_addresses) - - # TODO implement a LRU cache - def _list_records(self): - if not self._is_record_cache_dirty(): - return self._structures - - # otherwise cache Load - log.debug('[+] Loading cached records list') - self._structures = dict([(long(vaddr), s) for vaddr, s in structure.cache_load_all_lazy(self)]) - log.debug('[+] Loaded %d cached records addresses from disk', len(self._structures)) - - # If we are missing some allocators from the cache loading - # then recreated them in cache from Allocated memory - nb_missing = len(self._structures_addresses) - len(self._structures) - if nb_missing != 0: - from haystack.reverse.heuristics import reversers - log.debug('[+] Missing cached records %d' % nb_missing) - if nb_missing < 10: - nb_unique = len(set(self._structures_addresses) - set(self._structures)) - log.warning('TO check missing:%d unique:%d', nb_missing, nb_unique) - # use BasicCachingReverser to get user blocks - cache_reverse = reversers.BasicCachingReverser(self.memory_handler) - _ = cache_reverse.reverse_context(self) - log.info('[+] Built %d/%d records from allocations', - len(self._structures), - len(self._structures_addresses)) - return self._structures - - def get_record_size_for_address(self, addr): - """ - return the allocated record size associated with this address - - :param addr: - :return: - """ - itemindex = numpy.where(self._structures_addresses == numpy.int64(addr))[0][0] - return self._structures_sizes[itemindex] - - def get_record_count(self): - if self._is_record_cache_dirty(): - # refresh the cache - return len(self._list_records()) - return len(self._structures_addresses) - - def get_record_address_at_address(self, _address): - """ - Returns the closest containing record address for this address. - :param _address: - :return: - """ - # if offset not in self.heap: - # raise ValueError('address 0x%0.8x not in heap 0x%0.8x'%(offset, self.heap.start)) - return utils.closestFloorValue(_address, self._structures_addresses)[0] # [1] is the index of [0] - - def get_record_at_address(self, _address): - """ - Returns the closest containing record for this address. - :param _address: - :return: - """ - st = self.get_record_for_address(self.get_record_address_at_address(_address)) - if st.address <= _address < (st.address + len(st)): - return st - raise IndexError('No known structure covers that ptr_value') - - def get_record_for_address(self, addr): - """ - return the structure.AnonymousRecord associated with this address - - :param addr: - :return: - """ - return self._list_records()[addr] - - def listOffsetsForPointerValue(self, ptr_value): - '''Returns the list of offsets where this value has been found''' - return [int(self._pointers_offsets[offset]) - for offset in numpy.where(self._pointers_values == ptr_value)[0]] - - def listPointerValueInHeap(self): - '''Returns the list of pointers found in the heap''' - return list(map(long, self._pointers_values)) - - def listStructuresAddrForPointerValue(self, ptr_value): - '''Returns the list of allocators addresses with a member with this pointer value ''' - return sorted(set([int(self.get_record_address_at_address(offset)) - for offset in self.listOffsetsForPointerValue(ptr_value)])) - - def listStructuresForPointerValue(self, ptr_value): - '''Returns the list of allocators with a member with this pointer value ''' - return [self._list_records()[addr] - for addr in self.listStructuresAddrForPointerValue(ptr_value)] - - def list_allocations_addresses(self): - return list(map(long, self._structures_addresses)) - - def list_allocations_sizes(self): - return list(map(long, self._structures_sizes)) - - def listStructuresAddresses(self): - return list(map(long, self._list_records().keys())) - - def listStructures(self): - return list(self._list_records().values()) - - def is_known_address(self, address): - return address in self._structures_addresses - - # name of cache files - def get_folder_cache(self): - return config.get_cache_folder_name(self.dumpname) - - def get_folder_cache_structures(self): - return config.get_record_cache_folder_name(self.dumpname) - - def get_filename_cache_context(self): - return config.get_cache_filename(config.CACHE_CONTEXT, self.dumpname, self._heap_start) - - def get_filename_cache_headers(self): - return config.get_cache_filename(config.CACHE_GENERATED_PY_HEADERS_VALUES, self.dumpname, self._heap_start) - - def get_filename_cache_graph(self): - return config.get_cache_filename(config.CACHE_GRAPH, self.dumpname, self._heap_start) - - def get_filename_cache_pointers_addresses(self): - return config.get_cache_filename(config.CACHE_HEAP_ADDRS, self.dumpname, self._heap_start) - - def get_filename_cache_pointers_values(self): - return config.get_cache_filename(config.CACHE_HEAP_VALUES, self.dumpname, self._heap_start) - - def get_filename_cache_allocations_addresses(self): - return config.get_cache_filename(config.CACHE_MALLOC_CHUNKS_ADDRS, self.dumpname, self._heap_start) - - def get_filename_cache_allocations_sizes(self): - return config.get_cache_filename(config.CACHE_MALLOC_CHUNKS_SIZES, self.dumpname, self._heap_start) - - def get_filename_cache_signatures(self): - return config.get_cache_filename(config.CACHE_SIGNATURE_GROUPS_DIR, self.dumpname, self._heap_start) - - def get_filename_cache_strings(self): - return config.get_cache_filename(config.CACHE_STRINGS, self.dumpname, self._heap_start) - - def get_heap_pointers(self): - """ - @UNUSED - - Search Heap pointers values in stack and heap. - records values and pointers address in heap. - :param memory_handler: - :param heap_walker: - :return: - """ - feedback = searchers.NoFeedback() - matcher = matchers.PointerEnumerator(self.memory_handler) - word_size = self.memory_handler.get_target_platform().get_word_size() - enumerator = enumerators.WordAlignedEnumerator(self.heap, matcher, feedback, word_size) - return utils.get_cache_heap_pointers(self, enumerator) - - def get_heap_pointers_from_allocated(self, heap_walker): - """ - Search Heap pointers values in stack and heap. - records values and pointers address in heap. - :param dumpfilename: - :param memory_handler: - :param heap_walker: - :return: - """ - feedback = searchers.NoFeedback() - matcher = matchers.PointerEnumerator(self.memory_handler) - word_size = self.memory_handler.get_target_platform().get_word_size() - enumerator = enumerators.AllocatedWordAlignedEnumerator(heap_walker, matcher, feedback, word_size) - return utils.get_cache_heap_pointers(self, enumerator) - - @classmethod - def cacheLoad(cls, memory_handler, heap_addr): - dumpname = os.path.abspath(memory_handler.get_name()) - config.create_cache_folder(dumpname) - context_cache = config.get_cache_filename(config.CACHE_CONTEXT, dumpname, heap_addr) - try: - with open(context_cache, 'rb') as fin: - ctx = pickle.load(fin) - except (ValueError, EOFError) as e: - os.remove(context_cache) - log.error('Error in the context file. File cleaned. Please restart.') - raise IOError('Error in the context file. File cleaned. Please restart.') - log.debug('\t[-] loaded my context from cache') - ctx.config = config - ctx.memory_handler = memory_handler - ctx.heap = ctx.memory_handler.get_mapping_for_address(ctx._heap_start) - # and initialize - ctx._init2() - return ctx - - def save(self): - # we only need dumpfilename to reload _memory_handler, addresses to reload - # cached records - cache_context_filename = self.get_filename_cache_context() - try: - with open(cache_context_filename, 'wb') as fout: - pickle.dump(self, fout) - except pickle.PicklingError as e: - log.error("Pickling error on %s, file removed", cache_context_filename) - os.remove(cache_context_filename) - raise e - - def reset(self): - try: - cache_context_filename = self.get_filename_cache_context() - os.remove(cache_context_filename) - except OSError as e: - pass - - def __getstate__(self): - d = dict() - d['dumpname'] = self.__dict__['dumpname'] - d['_heap_start'] = self.__dict__['_heap_start'] - return d - - def __setstate__(self, d): - self.dumpname = d['dumpname'] - self._heap_start = d['_heap_start'] - self._structures = None - self._function_names = dict() - return - - def save_structures(self): - t0 = time.time() - if self._structures is None: - log.debug('No loading has been done, not saving anything') - return - # dump all allocators - for i, s in enumerate(self._structures.values()): - try: - s.saveme(self) - except KeyboardInterrupt as e: - os.remove(s.fname) - raise e - if time.time() - t0 > 30: # i>0 and i%10000 == 0: - tl = time.time() - rate = (tl - t0) / (1 + i) - _ttg = (len(self._structures) - i) * rate - log.info('\t\t - %2.2f seconds to go', _ttg) - t0 = tl - tf = time.time() - log.info('\t[.] saved in %2.2f secs' % (tf - t0)) - - def stats(self): - return "chunks:%d" % len(self._structures_addresses) - - -def get_context_for_address(memory_handler, address): - """ - Returns the haystack.reverse.context.HeapContext of the process - for the HEAP that hosts this address - """ - return memory_handler.get_reverse_context().get_context_for_address(address) - #assert isinstance(address, long) or isinstance(address, int) - #heap_mapping = memory_handler.get_mapping_for_address(address) - #if not heap_mapping: - # raise ValueError("Invalid address: 0x%x", address) - #finder = memory_handler.get_heap_finder() - # walker = finder.get_heap_walker(heap_mapping) - # if not walker: - # raise ValueError("Address is not in heap: 0x%x", address) - # _context = memory_handler.get_reverse_context() - # heap_context = _context.get_context_for_heap_walker(walker) - # return heap_context diff --git a/haystack/reverse/diff.py b/haystack/reverse/diff.py deleted file mode 100644 index abcd6c2f..00000000 --- a/haystack/reverse/diff.py +++ /dev/null @@ -1,226 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import print_function - -""" -Utils to diff two heap memory record allocations - -2015-08-01: does not work. Is not used. - -""" - -import argparse -import logging -import sys - -import os - -from haystack import argparse_utils -from haystack import dump_loader -import reversers -from haystack.reverse import context -from haystack.reverse import config -from haystack.reverse import structure - -__author__ = "Loic Jaquemet" -__copyright__ = "Copyright (C) 2012 Loic Jaquemet" -__email__ = "loic.jaquemet+python@gmail.com" -__license__ = "GPL" -__maintainer__ = "Loic Jaquemet" -__status__ = "Production" - - -log = logging.getLogger('diff') - - -def make(opts): - log.info('[+] Loading context of %s' % opts.dump1) - # '../../outputs/skype.1.a') # TODO - ctx = context.get_context(opts.dump1) - # refresh - if len(ctx.structures) != len(ctx.structures_addresses): - log.info( - '[+] Refreshing from %d allocators cached' % - (len( - ctx.structures))) - # FIXME, I think its now an heapwalker, not a reverser - mallocRev = reversers.MallocReverser() - ctx = mallocRev.reverse(ctx) - mallocRev.check_inuse(ctx) - log.info( - '[+] Final %d allocators from malloc blocs' % - (len( - ctx.structures))) - finder = ctx.get_memory_handler().get_heap_finder() - heap1 = finder.list_heap_walkers()[0] - log.info('[+] Loading _memory_handler of %s' % opts.dump2) - newmappings = dump_loader.load(opts.dump2) - finder2 = newmappings.get_heap_finder() - heap2 = finder2.list_heap_walkers()[0] - log.info('[+] finding diff values with %s' % opts.dump2) - addrs = cmd_cmp(heap1, heap2, heap1.start) - - # now compare with allocators addresses - structures = [] - realloc = 0 - log.info('[+] Looking at %d differences' % (len(addrs))) - st = [] - # joined iteration, found structure affected - # use info from malloc : allocators.start + .size - addr_iter = iter(addrs) - structs_addr_iter = iter(ctx.malloc_addresses) - structs_size_iter = iter(ctx.malloc_sizes) - try: - addr = addr_iter.next() - st_addr = structs_addr_iter.next() - st_size = structs_size_iter.next() - cnt = 1 - while True: - - while (addr - st_addr) >= st_size: # find st containing offset - st_addr = structs_addr_iter.next() - st_size = structs_size_iter.next() - # check for gaps - if (addr - st_addr) < 0: # went to far - no struct overlapping - # addr is in between two struct - dump all addr stuck out of - # malloc_chunks - while (addr - st_addr) < 0: - addr = addr_iter.next() - pass - continue - - # - # check if offset is really in st ( should be always if your not - # dumb/there no holes ) - if 0 <= (addr - st_addr) < st_size: - # tag the structure as different - structures.append(ctx.structures[st_addr]) - cnt += 1 - else: - # (addr - st_addr) < 0 # impossible by previous while - # (addr - st_addr) >= st_size # then continur - continue - - while (addr - st_addr) < st_size: # enumerate offsets in st range - addr = addr_iter.next() - cnt += 1 - except StopIteration as e: - pass - addrs_found = cnt - - log.info( - '[+] On %d diffs, found %d structs with different values. realloc: %d' % - (addrs_found, len(structures), realloc)) - log.info('[+] Outputing to file (will be long-ish)') - - print_diff_files(opts, context, newmappings, structures) - - -def print_diff_files(opts, context, newmappings, structures): - # print original struct in one file, diffed struct in the other - d1out = config.Config.getCacheFilename( - config.Config.DIFF_PY_HEADERS, '%s-%s' % - (opts.dump1, opts.dump1)) - d2out = config.Config.getCacheFilename( - config.Config.DIFF_PY_HEADERS, '%s-%s' % - (opts.dump1, opts.dump2)) - f1 = open(d1out, 'w') - f2 = open(d2out, 'w') - for st in structures: - st2 = structure.remap_load(context, st.vaddr, newmappings) - if st.bytes == st2.bytes: - print('identic bit field !!!') - return - # get the fields - # TODO FIXME , fix and leverage Field.getValue() to update from a changed mapping - # TODO, in toString(), pointer value should be in comment, to check for - # pointer change, when same pointed struct. - st.decodeFields() - #st.resolvePointers(ctx.structures_addresses, ctx.allocators) - # st._aggregateFields() - st2.reset() # clean previous state - st2.decodeFields() - #st2.resolvePointers(ctx.structures_addresses, ctx.allocators) - # st2._aggregateFields() - # write the files - f1.write(st.to_string()) - f1.write('\n') - f2.write(st2.to_string()) - f2.write('\n') - sys.stdout.write('.') - sys.stdout.flush() - print() - f1.close() - f2.close() - log.info('[+] diffed allocators dumped in %s %s' % (d1out, d2out)) - - -def cmd_cmp(heap1, heap2, baseOffset): - # LINUX based system command cmp parsing - import subprocess - - f1 = heap1._memdump.name - f2 = heap2._memdump.name - - addrs = [] - try: - res = subprocess.check_output(['cmp', f1, f2, '-l']) - except subprocess.CalledProcessError as e: - res = e.output - for line in res.split('\n'): - cols = line.split(' ') - try: - while cols[0] == '': - cols.pop(0) - except: - continue - addrs.append(int(cols.pop(0)) + baseOffset - 1) # starts with 1 - - return addrs - - -def argparser(): - rootparser = argparse.ArgumentParser( - prog='haystack-reversers-diff', - description='Diff struct of the same instance.') - rootparser.add_argument( - '--debug', - action='store_true', - help='Debug mode on.') - rootparser.add_argument( - 'dump1', - type=argparse_utils.readable, - action='store', - help='Dump file 1.') - rootparser.add_argument( - 'dump2', - type=argparse_utils.readable, - action='store', - help='Dump file 2.') - rootparser.set_defaults(func=make) - return rootparser - - -def main(argv): - parser = argparser() - opts = parser.parse_args(argv) - - level = logging.INFO - if opts.debug: - level = logging.DEBUG - - flog = os.path.normpath('log') - logging.basicConfig(level=level, filename=flog, filemode='w') - - logging.getLogger('diff').addHandler( - logging.StreamHandler( - stream=sys.stdout)) - - log.info('[+] output log to %s' % flog) - - opts.func(opts) - - -if __name__ == '__main__': - main(sys.argv[1:]) diff --git a/haystack/reverse/enumerators.py b/haystack/reverse/enumerators.py deleted file mode 100644 index 3f6a2dee..00000000 --- a/haystack/reverse/enumerators.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -import logging -from haystack.reverse import searchers -from haystack.reverse import matchers -from haystack.utils import xrange - -log = logging.getLogger('enumerators') - -""" -performance test: - -WordAlignedEnumerator: 16028 pointers, timeit 11.74 -AllocatedWordAlignedEnumerator: 596 pointers, timeit 0.20 - -""" - -class WordAlignedEnumerator(searchers.WordAlignedSearcher): - """ - return vaddr,value - expect a boolean, value tuple from test_match - """ - def _init(self): - if not isinstance(self._matcher, matchers.AbstractMatcherWithValue): - raise TypeError("matcher should be a AbstractMatcherWithValue") - - def __iter__(self): - """ Iterate over the mapping to find all valid matches """ - mapping = self.get_search_mapping() - for i, vaddr in enumerate(xrange(mapping.start, mapping.end, self._word_size)): - self._check_steps(i) # be verbose - # expect a boolean, value tuple from testMatch - b, val = self._matcher.test_match(mapping, vaddr) - if b: - yield (vaddr, val) - return - - -class AllocatedWordAlignedEnumerator(searchers.AllocatedWordAlignedSearcher): - """ - return vaddr,value - expect a boolean, value tuple from test_match - """ - def _init(self): - if not isinstance(self._matcher, matchers.AbstractMatcherWithValue): - raise TypeError("matcher should be a AbstractMatcherWithValue") - - def __iter__(self): - """ - Iterate over the allocated chunk of this heap mapping to find all valid matches - """ - log.debug('iterate allocated chunks in %s heap mapping for matching values', self.get_search_mapping()) - mapping = self.get_search_mapping() - i = 0 - for vaddr, size in self._walker.get_user_allocations(): - self._check_steps(i) - # check head of chunk - # expect a boolean, value tuple from testMatch - b, val = self._matcher.test_match(mapping, vaddr) - if b: - yield (vaddr, val) - if size < 2*self._word_size: - continue - # check each offset in that allocated chunk - for vaddr_2 in xrange(vaddr+size, vaddr+size-self._word_size, self._word_size): - i+=1 - self._check_steps(i) - # expect a boolean, value tuple from testMatch - b, val = self._matcher.test_match(mapping, vaddr_2) - if b: - yield (vaddr_2, val) - return \ No newline at end of file diff --git a/haystack/reverse/fieldtypes.py b/haystack/reverse/fieldtypes.py deleted file mode 100644 index 1c138c71..00000000 --- a/haystack/reverse/fieldtypes.py +++ /dev/null @@ -1,362 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -import logging -import ctypes - -from haystack.reverse import config -from haystack.reverse import structure - - -""" -the Python classes to represent the guesswork record and field typing of -allocations. -""" - - -log = logging.getLogger('field') - -# Field related functions and classes - - -class FieldType(object): - """ - Represents the type of a field. - """ - types = set() - - def __init__(self, _id, _name, _signature): - self.__id = _id - self.__name = _name - self.__sig = _signature - - @property - def id(self): - return self.__id - - @property - def name(self): - return self.__name - - @property - def signature(self): - return self.__sig - - def __lt__(self, other): - return self.id < other.id - - def __hash__(self): - return hash(self.id) - - def __str__(self): - return '' % self.name - - def __repr__(self): - return '' % self.name - - -class FieldTypeStruct(FieldType): - """ - Fields that are know independent structure. - In case we reverse a Big record that has members of known record types. - """ - - def __init__(self, _typename): - assert isinstance(_typename, str) - super(FieldTypeStruct, self).__init__(0x1, _typename, 'K') - - def __str__(self): - return self.name - - -class FieldTypeArray(FieldType): - """ - An array type - """ - def __init__(self, item_type, item_size, nb_items): - super(FieldTypeArray, self).__init__(0x60, '%s*%d' % (item_type.name, nb_items), 'a') - self.nb_items = nb_items - self.item_type = item_type - self.item_size = item_size - self.size = item_size*nb_items - - -class RecordTypePointer(FieldType): - def __init__(self, _type): - #if typ == STRING: - # return STRING_POINTER - super(RecordTypePointer, self).__init__(_type.id + 0xa, 'ctypes.POINTER(%s)' % _type.name, 'P') - - -# setup all the know types that are interesting to us -UNKNOWN = FieldType(0x0, 'ctypes.c_ubyte', 'u') -STRUCT = FieldType(0x1, 'Structure', 'K') -ZEROES = FieldType(0x2, 'ctypes.c_ubyte', 'z') -STRING = FieldType(0x4, 'ctypes.c_char', 'T') -STRING16 = FieldType(0x14, 'ctypes.c_char', 'T') -STRINGNULL = FieldType(0x6, 'ctypes.c_char', 'T') -STRING_POINTER = FieldType(0x4 + 0xa, 'ctypes.c_char_p', 's') -INTEGER = FieldType(0x18, 'ctypes.c_uint', 'I') -SMALLINT = FieldType(0x8, 'ctypes.c_uint', 'i') -SIGNED_SMALLINT = FieldType(0x28, 'ctypes.c_int', 'i') -ARRAY = FieldType(0x40, 'Array', 'a') -BYTEARRAY = FieldType(0x50, 'ctypes.c_ubyte', 'a') -# ARRAY_CHAR_P = FieldType(0x9, 'array_char_p', 'ctypes.c_char_p', 'Sp') -POINTER = FieldType(0xa, 'ctypes.c_void_p', 'P') -PADDING = FieldType(0xff, 'ctypes.c_ubyte', 'X') - - -class Field(object): - """ - Class that represent a Field instance, a FieldType instance. - """ - def __init__(self, name, offset, _type, size, is_padding): - self.__name = name - self.__offset = offset - assert isinstance(_type, FieldType) - self.__field_type = _type - self.__size = size - self.__padding = is_padding - self.__comment = '#' - - @property - def name(self): - return self.__name - - @name.setter - def name(self, _name): - if _name is None: - self.__name = '%s_%s' % (self.field_type.name, self.offset) - else: - self.__name = _name - - @property - def offset(self): - return self.__offset - - @property - def field_type(self): - return self.__field_type - - @property - def size(self): - return self.__size - - @property - def padding(self): - return self.__padding - - @property - def comment(self): - return self.__comment - - @comment.setter - def comment(self, txt): - self.__comment = '# %s' % txt - - def is_string(self): # null terminated - return self.field_type in [STRING, STRING16, STRINGNULL, STRING_POINTER] - - def is_pointer(self): - # we could be a pointer or a pointer string - return issubclass(self.__class__, PointerField) - - def is_zeroes(self): - return self.field_type == ZEROES - - def is_array(self): # will be overloaded - return self.field_type == ARRAY or self.field_type == BYTEARRAY - - def is_integer(self): - return self.field_type == INTEGER or self.field_type == SMALLINT or self.field_type == SIGNED_SMALLINT - - def is_record(self): - return self.field_type == STRUCT - - def is_gap(self): - return self.field_type == UNKNOWN - - def get_typename(self): - if self.is_string() or self.is_zeroes(): - return '%s*%d' % (self.field_type.name, len(self)) - elif self.is_array(): - # TODO should be in type - return '%s*%d' % (self.field_type.name, len(self) / self.nb_items) - elif self.field_type == UNKNOWN: - return '%s*%d' % (self.field_type.name, len(self)) - return self.field_type.name - - def __hash__(self): - return hash((self.offset, self.size, self.field_type)) - - # FIXME python 3 - def __lt__(self, other): - return self.offset < other.offset - - # FIXME obselete - def __cmp__(self, other): - # XXX : Perf... cmp sux - try: - if self.offset < other.offset: - return -1 - elif self.offset > other.offset: - return 1 - elif (self.offset, self.size, self.field_type) == (other.offset, other.size, other.field_type): - return 0 - # last chance, expensive cmp - return cmp((self.offset, self.size, self.field_type), - (other.offset, other.size, other.field_type)) - except AttributeError as e: - # if not isinstance(other, Field): - return -1 - - def __len__(self): - return int(self.size) # some long come and goes - - def __repr__(self): - return str(self) - - def __str__(self): - return '' % (self.offset, self.size, self.field_type) - - def get_signature(self): - return self.field_type, self.size - - def to_string(self, value): - if value is None: - value = 0 - if self.is_pointer(): - comment = '# @ 0x%0.8x %s' % (value, self.comment) - elif self.is_integer(): - comment = '# 0x%x %s' % (value, self.comment) - elif self.is_zeroes(): - comment = '''# %s zeroes: '\\x00'*%d''' % (self.comment, len(self)) - elif self.is_string(): - comment = '# %s %s: %s' % (self.comment, self.field_type.name, value) - elif self.is_record(): - comment = '#' - else: - # unknown - comment = '# %s else bytes:%s' % (self.comment, repr(value)) - # prep the string - fstr = "( '%s' , %s ), %s\n" % (self.name, self.get_typename(), comment) - return fstr - - -class PointerField(Field): - """ - represent a pointer field - """ - def __init__(self, name, offset, size): - super(PointerField, self).__init__(name, offset, POINTER, size, False) - self.__pointee = None - self.__pointer_to_ext_lib = False\ - # ?? - self._child_addr = 0 - self._child_desc = None - self._child_type = None - - @property - def pointee(self): - return self.__pointee - - @pointee.setter - def pointee(self, pointee_field): - self.__pointee = pointee_field - - def is_pointer_to_string(self): - # if hasattr(self, '_ptr_to_ext_lib'): - # return False - return self.pointee.is_string() - - def is_pointer_to_ext_lib(self): - return self.__pointer_to_ext_lib - - def set_pointer_to_ext_lib(self): - self.__pointer_to_ext_lib = True - - def set_pointee_addr(self, addr): - self._child_addr = addr - - def set_pointee_desc(self, desc): - self._child_desc = desc - - def set_pointee_ctype(self, _type): - self._child_type = _type - - -class ArrayField(Field): - """ - Represents an array field. - """ - # , basicTypename, basicTypeSize ): # use first element to get that info - def __init__(self, name, offset, item_type, item_size, nb_item): - size = item_size * nb_item - super(ArrayField, self).__init__(name, offset, FieldTypeArray(item_type, item_size, nb_item), size, False) - - def get_typename(self): - return self.field_type.name - - def is_array(self): - return True - - def _get_value(self, _record, maxLen=120): - return None - - def to_string(self, _record, prefix=''): - item_type = self.field_type.item_type - # log.debug('P:%s I:%s Z:%s typ:%s' % (item_type.is_pointer(), item_type.is_integer(), item_type.is_zeroes(), item_type.name)) - log.debug("array type: %s", item_type.name) - # - comment = '# %s array' % self.comment - fstr = "%s( '%s' , %s ), %s\n" % (prefix, self.name, self.get_typename(), comment) - return fstr - - -class ZeroField(ArrayField): - """ - Represents an array field of zeroes. - """ - def __init__(self, name, offset, nb_item): - super(ZeroField, self).__init__(name, offset, ZEROES, 1, nb_item) - - def is_zeroes(self): - return True - - -class RecordField(Field, structure.AnonymousRecord): - """ - make a record field - """ - def __init__(self, parent, offset, field_name, field_type, fields): - size = sum([len(f) for f in fields]) - _address = parent.address + offset - structure.AnonymousRecord.__init__(self, parent._memory_handler, _address, size, prefix=None) - Field.__init__(self, field_name, offset, FieldTypeStruct(field_type), size, False) - structure.AnonymousRecord.set_name(self, field_name) - #structure.AnonymousRecord.add_fields(self, fields) - _record_type = structure.RecordType(field_type, size,fields) - self.set_record_type(_record_type) - return - - def get_typename(self): - return '%s' % self.field_type - - @property - def address(self): - raise NotImplementedError('You cannot call address on a subrecord') - - -# def to_string(self, *args): -# # print self.fields -# fieldsString = '[ \n%s ]' % (''.join([field.to_string(self, '\t') for field in self.get_fields()])) -# info = 'rlevel:%d SIG:%s size:%d' % (self.get_reverse_level(), self.get_signature(), len(self)) -# ctypes_def = ''' -#class %s(ctypes.Structure): # %s -# _fields_ = %s -# -#''' % (self.name, info, fieldsString) -# return ctypes_def diff --git a/haystack/reverse/graph.py b/haystack/reverse/graph.py deleted file mode 100644 index 09e99004..00000000 --- a/haystack/reverse/graph.py +++ /dev/null @@ -1,297 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -from __future__ import print_function - -from collections import defaultdict - -import networkx -import argparse -import logging -import os -import sys - -import matplotlib.pyplot as plt -from haystack import argparse_utils -from haystack import dump_loader -from haystack.reverse import config -from haystack.reverse import context -from haystack.reverse import utils - -""" -Graph tools to represent allocations in a graph. -That allows graph algorithms applications. -""" - - -log = logging.getLogger('graph') - - -def printGraph(G, gname): - h = networkx.DiGraph() - h.add_edges_from(G.edges()) - networkx.draw_graphviz(h) - fname = os.path.sep.join([config.imgCacheDir, 'graph_%s.png' % gname]) - plt.savefig(fname) - plt.clf() - fname = os.path.sep.join([config.cacheDir, 'graph_%s.gexf' % gname]) - networkx.readwrite.gexf.write_gexf(h, fname) - return - -# extract graph - - -def depthSubgraph(source, target, nodes, depth): - if depth == 0: - return - depth -= 1 - for node in nodes: - neighbors = source.successors(node) - target.add_edges_from(source.edges(node)) - depthSubgraph(source, target, neighbors, depth) - return - - -def save_graph_headers(ctx, graph, fname): - fout = open(os.path.sep.join([config.cacheDir, fname]), 'w') - towrite = [] - structs = [ctx.structures[int(addr, 16)] for addr in graph.nodes()] - for anon in structs: - print(anon) - towrite.append(anon.to_string()) - if len(towrite) >= 10000: - try: - fout.write('\n'.join(towrite)) - except UnicodeDecodeError as e: - print('ERROR on ', anon) - towrite = [] - fout.flush() - fout.write('\n'.join(towrite)) - fout.close() - return - - -def make(opts): - fname = opts.gexf - - # if __name__ == '__main__': - # if False: - #ctx = context.get_context('../../outputs/skype.1.a') - memory_handler = dump_loader.load(opts.dumpname) - - #digraph=networkx.readwrite.gexf.read_gexf( '../../outputs/skype.1.a.gexf') - digraph = networkx.readwrite.gexf.read_gexf(opts.gexf.name) - finder = memory_handler.get_heap_finder() - heap = finder.list_heap_walkers()[0] - - # only add heap structure with links - edges = [ - (x, y) for x, y in digraph.edges() if int( - x, 16) in heap and int( - y, 16) in heap] - graph = networkx.DiGraph() - graph.add_edges_from(edges) - - printGraph(graph, os.path.basename(opts.dumpname)) - - -def clean(digraph): - # clean solos - isolates = networkx.algorithms.isolate.isolates(digraph) - digraph.remove_nodes_from(isolates) - - # clean solos clusters - graph = networkx.Graph(digraph) # undirected - subgraphs = networkx.algorithms.components.connected.connected_component_subgraphs( - graph) - isolates1 = set(utils.flatten(g.nodes() for g in subgraphs if len(g) == 1)) # self connected - isolates2 = set(utils.flatten(g.nodes() for g in subgraphs if len(g) == 2)) - isolates3 = set(utils.flatten(g.nodes() for g in subgraphs if len(g) == 3)) - digraph.remove_nodes_from(isolates1) - digraph.remove_nodes_from(isolates2) - digraph.remove_nodes_from(isolates3) - - # - #graph = digraph.to_undirected() - #subgraphs = networkx.algorithms.components.connected.connected_component_subgraphs(graph) - subgraphs = [g for g in subgraphs if len(g) > 3] - isolatedGraphs = subgraphs[1:100] - - # group by nodes number - isoDict = defaultdict(list) - [isoDict[len(g)].append(g) for g in isolatedGraphs] - - # test isomorphism - isoGraphs = dict() - for numNodes, graphs in isoDict.items(): - numgraphs = len(graphs) - if numgraphs == 1: - continue - isoGraph = networkx.Graph() - # quick find isomorphisms - todo = set(graphs) - for i, g1 in enumerate(graphs): - for g2 in graphs[i + 1:]: - if networkx.is_isomorphic(g1, g2): - print('numNodes:%d graphs %d, %d are isomorphic' % (numNodes, i, i + 1)) - isoGraph.add_edge(g1, g2, {'isomorphic': True}) - if g2 in todo: - todo.remove(g2) - if g1 in todo: - todo.remove(g1) - # we can stop here, chain comparaison will work between g2 - # and g3 - break - - if len(isoGraph) > 0: - isoGraphs[numNodes] = isoGraph - - # draw the isomorphisms - for i, item in enumerate(isoGraphs.items()): - num, g = item - # networkx.draw(g) - for rg in g.nodes(): - networkx.draw(rg) - fname = os.path.sep.join( - [config.imgCacheDir, 'isomorph_subgraphs_%d.png' % num]) - plt.savefig(fname) - plt.clf() - # need to use gephi-like for rendering nicely on the same pic - - bigGraph = networkx.DiGraph() - bigGraph.add_edges_from(digraph.edges(subgraphs[0].nodes())) - - stack_addrs = utils.int_array_cache( - config.get_cache_filename(config.CACHE_STACK_VALUES, ctx.dumpname, ctx._heap_addr)) - stack_addrs_txt = set(['%x' % addr - for addr in stack_addrs]) # new, no long - - stacknodes = list(set(bigGraph.nodes()) & stack_addrs_txt) - print('stacknodes left', len(stacknodes)) - orig = list(set(graph.nodes()) & stack_addrs_txt) - print('stacknodes orig', len(orig)) - - # identify strongly referenced allocators - degreesList = [(bigGraph.in_degree(node), node) - for node in bigGraph.nodes()] - degreesList.sort(reverse=True) - -# important struct - - -def printImportant(ctx, digraph, degreesList, ind, bigGraph): - nb, saddr = degreesList[ind] - addr = int(saddr, 16) - s1 = ctx.structures[addr] # TODO FIXME RAISES - # s1 = s1._load() #structure.cacheLoad(ctx, int(saddr,16)) - s1.decodeFields() - print(s1.to_string()) - # strip the node from its predecessors, they are numerously too numerous - impDiGraph = networkx.DiGraph() - root = '%d nodes' % nb - impDiGraph.add_edge(root, saddr) - depthSubgraph(bigGraph, impDiGraph, [saddr], 2) - print('important struct with %d structs pointing to it, %d pointerFields' % ( - digraph.in_degree(saddr), digraph.out_degree(saddr))) - # print 'important struct with %d structs pointing to it, %d - # pointerFields'%(impDiGraph.in_degree(saddr), - # impDiGraph.out_degree(saddr)) - fname = os.path.sep.join( - [config.imgCacheDir, 'important_%s.png' % saddr]) - networkx.draw(impDiGraph) - plt.savefig(fname) - plt.clf() - # check for children with identical sig - for node in impDiGraph.successors(saddr): - st = ctx.structures[int(node, 16)] - st.decodeFields() - # FIXME rework, usage of obselete function - st.resolvePointers() - # st.pointerResolved=True - # st._aggregateFields() - print(node, st.get_signature(text=True)) - # clean and print - # s1._aggregateFields() - impDiGraph.remove_node(root) - save_graph_headers(ctx, impDiGraph, '%s.subdigraph.py' % saddr) - return s1 - - -def deref(ctx, f): - ctx.structures[f.target_struct_addr].decodeFields() - return ctx.structures[f.target_struct_addr] - -# s1 = printImportant(0) # la structure la plus utilisee. - -# TODO -# -# get nodes with high out_degree, -# compare their successors signature, and try to find a common sig sig1 -# if sig1 , lone, sig1 , .... , try to fit lone in sig1 ( zeroes/pointers) -# aggregate group of successors given the common sig - -# identify chained list ( see isolatedGraphs[0] ) - -# b800dcc is a big kernel - -# print deref(sb800[7]).toString() -#>>> hex(16842753) -#'0x1010001' -> bitfield - - -# s1._aggregateFields() - -#s2 = utils.nextStructure(ctx, s1) -# s2b should start with \x00's - - -def argparser(): - rootparser = argparse.ArgumentParser( - prog='haystack-reversers-graph', - description='Play with graph repr of pointers relationships.') - rootparser.add_argument( - '--debug', - action='store_true', - help='Debug mode on.') - rootparser.add_argument( - 'gexf', - type=argparse.FileType('rb'), - action='store', - help='Source gexf.') - rootparser.add_argument( - 'dumpname', - type=argparse_utils.readable, - action='store', - help='Source gexf.') - rootparser.set_defaults(func=make) - return rootparser - - -def main(argv): - parser = argparser() - opts = parser.parse_args(argv) - - level = logging.INFO - if opts.debug: - level = logging.DEBUG - - flog = os.path.normpath('log') - logging.basicConfig(level=level, filename=flog, filemode='w') - - # logging.getLogger('haystack').setLevel(logging.INFO) - # logging.getLogger('dumper').setLevel(logging.INFO) - # logging.getLogger('structure').setLevel(logging.INFO) - # logging.getLogger('field').setLevel(logging.INFO) - # logging.getLogger('progressive').setLevel(logging.INFO) - logging.getLogger('graph').addHandler(logging.StreamHandler(stream=sys.stdout)) - - log.info('[+] output log to %s' % flog) - - opts.func(opts) - - -if __name__ == '__main__': - main(sys.argv[1:]) diff --git a/haystack/reverse/heuristics/__init__.py b/haystack/reverse/heuristics/__init__.py deleted file mode 100644 index 1be348ee..00000000 --- a/haystack/reverse/heuristics/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -__author__ = "Loic Jaquemet loic.jaquemet+python@gmail.com" - -__doc__ = """ - Heuristics to do static type analysis from bytes. -""" - -#import pattern - -all = [ -] diff --git a/haystack/reverse/heuristics/constraints.py b/haystack/reverse/heuristics/constraints.py deleted file mode 100644 index 7fff665e..00000000 --- a/haystack/reverse/heuristics/constraints.py +++ /dev/null @@ -1,65 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -""" -""" - -from haystack.reverse import structure - -from collections import Counter -import logging - -log = logging.getLogger('constraints') - - -class ConstraintsReverser(object): - def __init__(self, memory_handler): - self.__memory_handler = memory_handler - self.__process_context = memory_handler.get_reverse_context() - - def activate(self, _record_type, members): - # apply the fields template to all members of the list - for list_item_addr in members: - _context = self.__process_context.get_context_for_address(list_item_addr) - _item = _context.get_record_for_address(list_item_addr) - _item.set_record_type(_record_type, True) - - # push the LIST_ENTRY type into the context/memory_handler - self.__process_context.add_reversed_type(_record_type, members) - - return - - def verify(self, _record_type, members): - records = [] - lines = [] - # try to apply the fields template to all members of the list - for list_item_addr in members: - _context = self.__process_context.get_context_for_address(list_item_addr) - _item = _context.get_record_for_address(list_item_addr) - new_record = structure.AnonymousRecord(self.__memory_handler, _item.address, len(_item), prefix=None) - new_record.set_record_type(_record_type, True) - records.append(new_record) - lines.append('# instances: [%s]' % (','.join(['0x%x' % addr for addr in members]))) - - # check fields values - for i, field in enumerate(_record_type.get_fields()): - if field.is_record(): - # we ignore the subrecord. is too complicated to show. - continue - values = [] - for _item in records: - val = _item.get_value_for_field(field) - if field.is_pointer(): - values.append(hex(val)) - else: - values.append(val) - if field.is_zeroes() and len(values) == 1: - values = [0] - # ignore the field in that case. - continue - counter = Counter(values) - # print 'field: %s values: %s' % (field.name, counter) - lines.append('# field: %s values: %s' % (field.name, counter)) - return lines diff --git a/haystack/reverse/heuristics/data/words.100 b/haystack/reverse/heuristics/data/words.100 deleted file mode 100644 index 0891b4b6..00000000 --- a/haystack/reverse/heuristics/data/words.100 +++ /dev/null @@ -1,100 +0,0 @@ -adieux -abhors -remark -bitmap -wooded -unsold -fables -gouged -iodize -equine -pecked -doyens -carded -lazier -midget -steaks -shrewd -snippy -cyclic -antics -mattes -scrods -snotty -butler -sooths -liking -drowns -gotten -stilts -waxier -extent -revels -sporty -denser -inerts -weirdo -limned -blades -madams -breads -pitted -syrupy -events -saddle -malice -cretin -coming -razzes -choker -vaults -bygone -myriad -acumen -amebas -vendor -pelvis -sawyer -toning -kisses -burned -folded -caliph -snuffs -snored -craves -widest -darkly -conned -damper -masons -silver -worker -outset -ashier -tosses -jungle -stroke -refers -ruffed -bereft -lively -riding -rupees -sunken -savory -eerier -elates -worlds -rodent -swiped -swifts -mounds -source -dinghy -stills -siphon -sticky -duster -cloned -mewing diff --git a/haystack/reverse/heuristics/dsa.py b/haystack/reverse/heuristics/dsa.py deleted file mode 100644 index 5a4556c4..00000000 --- a/haystack/reverse/heuristics/dsa.py +++ /dev/null @@ -1,431 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -import array -import collections -import logging -import numbers - -from haystack.reverse import fieldtypes -from haystack.reverse import re_string -from haystack.reverse import structure -from haystack.reverse.heuristics import model - -log = logging.getLogger('dsa') - -# fieldtypes.Field analysis related functions and classes - -def _py3_byte_compat(c): - if isinstance(c, numbers.Number): - assert(0 <= c < 256) - c = chr(c).encode() - return c - -_w = _py3_byte_compat - -class ZeroFields(model.FieldAnalyser): - """ checks for possible fields, aligned, with WORDSIZE zeros.""" - def make_fields(self, _record, offset, size): - assert(offset % self._word_size == 0) # vaddr and offset should be aligned - # log.debug('checking Zeroes') - self._typename = fieldtypes.ZEROES - self._zeroes = b'\x00' * self._word_size - - ret = self._find_zeroes(_record, offset, size) - - # TODO if its just a word, we should say its a small int. - return ret - - def _find_zeroes(self, _record, offset, size): - """ iterate over the bytes until a byte if not \x00 """ - _bytes = _record.bytes - # print 'offset:%x blen:%d'%(offset, len(bytes)) - # print repr(bytes) - assert(offset % self._word_size == 0) - # aligned_off = (offset)%self._target_platform.get_word_size() - start = offset - # if aligned_off != 0: # align to next - # start += (self._target_platform.get_word_size() - aligned_off) - # size -= (self._target_platform.get_word_size() - aligned_off) - # iterate - matches = array.array('i') - for i in range(start, start + size, self._word_size): - # PERF TODO: bytes or struct test ? - # print repr(bytes[start+i:start+i+self._target_platform.get_word_size()]) - if _w(_bytes[start + i:start + i + self._word_size]) == self._zeroes: - matches.append(start + i) - # print matches - # collate - if len(matches) == 0: - return [] - # lets try to get fields - fields = [] - # first we need to collate neighbors - collates = list() - prev = matches[0] - self._word_size - x = [] - # PERF TODO: whats is algo here - for i in matches: - if i - self._word_size == prev: - x.append(i) - else: - collates.append(x) - x = [i] - prev = i - collates.append(x) - # log.debug(collates) - # we now have collated, lets create fields - for field in collates: - flen = len(field) - if flen > 1: - size = self._word_size * flen - elif flen == 1: - size = self._word_size - else: - continue - # make a field - _offset = start + field[0] - fields.append(fieldtypes.ZeroField('zerroes_%d' % _offset, _offset, size)) - # we have all fields - return fields - - -class UTF16Fields(model.FieldAnalyser): - """ - rfinds utf-16-ascii and ascii 7bit - """ - def make_fields(self, _record, offset, size): - assert(offset % self._word_size == 0) # vaddr and offset should be aligned - # log.debug('checking String') - fields = [] - _bytes = _record.bytes - while size > self._word_size: - # print 're_string.rfind_utf16(bytes, %d, %d)'%(offset,size) - # we force aligned results only. otherwise er have overlaps - index = re_string.rfind_utf16(_bytes, offset, size, False, self._word_size) - if index > -1: - _offset = offset + index - f = fieldtypes.Field('utf16_%d' % _offset, _offset, fieldtypes.STRING16, size - index, False) - # print repr(structure.bytes[f.offset:f.offset+f.size]) - fields.append(f) - size = index # reduce unknown field in prefix - else: - size -= self._word_size # reduce unknown field - # look in head - return fields - - -class PrintableAsciiFields(model.FieldAnalyser): - - """ finds printable ascii fields """ - - def make_fields(self, _record, offset, size): - # vaddr and offset should be aligned - assert(offset % self._word_size == 0) - # log.debug('checking String') - fields = [] - _bytes = _record.bytes - while size >= self._word_size: - # print 're_string.find_ascii(bytes, %d, %d)'%(offset,size) - index, ssize = re_string.find_ascii(_bytes, offset, size) - if index == 0: - _offset = offset + index - # PY3 wrapper _w - if (ssize < size) and _w(_bytes[offset + index + ssize]) == b'\x00': # space for a \x00 - ssize += 1 - f = fieldtypes.Field('strnull_%d' % _offset, _offset, fieldtypes.STRINGNULL, ssize, False) - else: - f = fieldtypes.Field('str_%d' % _offset, _offset, fieldtypes.STRING, ssize, False) - # print repr(structure.bytes[f.offset:f.offset+f.size]) - fields.append(f) - size -= ssize # reduce unknown field - offset += ssize - if ssize % self._word_size: - rest = self._word_size - ssize % self._word_size - size -= rest # goto next aligned - offset += rest - else: - size -= self._word_size # reduce unkown field - offset += self._word_size - # look in head - return fields - - -class PointerFields(model.FieldAnalyser): - """ looks at a word for a pointer value""" - def make_fields(self, _record, offset, size): - # iterate on all offsets . NOT assert( size == - # self._target_platform.get_word_size()) - assert(offset % self._word_size == 0) # vaddr and offset should be aligned - log.debug('checking Pointer') - _bytes = _record.bytes - fields = [] - ctypes_utils = self._target.get_target_ctypes_utils() - while size >= self._word_size: - value = ctypes_utils.unpackWord(_bytes[offset:offset + self._word_size]) - # check if pointer value is in range of _memory_handler and set self.comment to pathname value of pointer - # TODO : if bytes 1 & 3 == \x00, maybe utf16 string - if not self._memory_handler.is_valid_address(value): - size -= self._word_size - offset += self._word_size - continue - # FIXME 20151103 dont ignore it - # what will it break ? - if False: - # 20151026 - if aligned, ignore it - if value % self._target.get_word_size(): - size -= self._word_size - offset += self._word_size - continue - # we have a pointer - log.debug('checkPointer offset:%s value:%s' % (offset, hex(value))) - field = fieldtypes.PointerField('ptr_%d' % offset, offset, self._word_size) - # TODO: leverage the context._function_names - # if value in structure._context._function_names: - # field.comment = ' %s::%s' % (os.path.basename(self._memory_handler.get_mapping_for_address(value).pathname), - # structure._context._function_names[value]) - # else: - # field.comment = self._memory_handler.get_mapping_for_address(value).pathname - field.comment = self._memory_handler.get_mapping_for_address(value).pathname - - fields.append(field) - size -= self._word_size - offset += self._word_size - return fields - - -class IntegerFields(model.FieldAnalyser): - - """ looks at a word for a small int value""" - - def make_fields(self, _record, offset, size): - # iterate on all offsets . NOT assert( size == - # self._target_platform.get_word_size()) - assert(offset % self._word_size == 0) # vaddr and offset should be aligned - # log.debug('checking Integer') - my_bytes = _record.bytes - fields = [] - while size >= self._word_size: - # print 'checking >' - field = self.check_small_integers(my_bytes, offset) - if field is None: - # print 'checking <' - field = self.check_small_integers(my_bytes, offset, '>') - # we have a field smallint - if field is not None: - fields.append(field) - size -= self._word_size - offset += self._word_size - return fields - - def check_small_integers(self, my_bytes, offset, endianess='<'): - """ check for small value in signed and unsigned forms """ - data = my_bytes[offset:offset + self._word_size] - val = self._target.get_target_ctypes_utils().unpackWord(data, endianess) - # print endianess, val - if val < 0xffff: - field = fieldtypes.Field('small_int_%d' % offset, offset, fieldtypes.SMALLINT, self._word_size, False) - # FIXME - field.value = val - field.endianess = endianess - return field - # check signed int - elif (2 ** (self._word_size * 8) - 0xffff) < val: - _name = 'small_signed_int_%d' % offset - field = fieldtypes.Field(_name, offset, fieldtypes.SIGNED_SMALLINT, self._word_size, False) - # FIXME - field.value = val - field.endianess = endianess - return field - return None - - -class FieldReverser(model.AbstractReverser): - """ - Decode each record by asserting simple basic types from the byte content. - - Simple structure analyzer that leverage simple type recognition heuristics. - For all aligned offset, try to apply the following heuristics : - ZeroFields: if the word is null - UTF16Fields: if the offset contains utf-16 data - PrintableAsciiFields: if the offset starts a printable ascii string - IntegerFields: if the word value is small ( |x| < 65535 ) - PointerFields: if the word if a possible pointer value - - If the word content does not match theses heuristics, tag the field has unknown. - - TODO: UTF16 array corrector, if utf16 field is preceded by smallint, aggregate both in utf16, - event if not aligned. - """ - REVERSE_LEVEL = 10 - - def __init__(self, memory_handler): - super(FieldReverser, self).__init__(memory_handler) - self.zero_a = ZeroFields(self._memory_handler) - self.ascii_a = PrintableAsciiFields(self._memory_handler) - self.utf16_a = UTF16Fields(self._memory_handler) - self.int_a = IntegerFields(self._memory_handler) - self.ptr_a = PointerFields(self._memory_handler) - - def reverse_record(self, _context, _record): - _record.reset() - fields, gaps = self._analyze(_record) - # _record.add_fields(fields) - # _record.add_fields(gaps) # , fieldtypes.UNKNOWN - _record_type = structure.RecordType('struct_%x' % _record.address, len(_record), fields+gaps) - _record.set_record_type(_record_type) - _record.set_reverse_level(self._reverse_level) - return _record - - def _analyze(self, _record): - slen = len(_record) - offset = 0 - # call on analyzers - fields = [] - nb = -1 - gaps = [fieldtypes.Field('unknown_0', 0, fieldtypes.UNKNOWN, len(_record), False)] - - _record.set_reverse_level(10) - - # find zeroes - # find strings - # find smallints - # find pointers - for analyser in [self.zero_a, self.utf16_a, self.ascii_a, self.int_a, self.ptr_a]: - log.debug("analyzing with %s", analyser) - for field in gaps: - if field.padding: - fields.append(field) - continue - log.debug('Using %s on %d:%d', analyser.__class__.__name__, field.offset, field.offset + len(field)) - new_fields = analyser.make_fields(_record, field.offset, len(field)) - fields.extend(new_fields) - for f1 in new_fields: - log.debug('new_field %s', f1) - # print fields - if len(fields) != nb: # no change in fields, keep gaps - nb = len(fields) - gaps = self._make_gaps(_record, fields) - if len(gaps) == 0: - return fields, gaps - return fields, gaps - - def _make_gaps(self, _record, fields): - fields.sort() - gaps = [] - nextoffset = 0 - for i, f in enumerate(fields): - if f.offset > nextoffset: # add temp padding field - self._aligned_gaps(_record, f.offset, nextoffset, gaps) - elif f.offset < nextoffset: - log.debug(_record) - log.debug(f) - log.debug('%s < %s ' % (f.offset, nextoffset)) - log.debug(fields[i + 1]) - log.error("need to TU the fields gap with utf8 text") - assert False # f.offset < nextoffset # No overlaps authorised - # fields.remove(f) - # do next field - nextoffset = f.offset + len(f) - # conclude on QUEUE insertion - lastfield_size = len(_record) - nextoffset - if lastfield_size > 0: - if lastfield_size < self._word_size: - gap = fieldtypes.Field('gap_%d' % nextoffset, nextoffset, fieldtypes.UNKNOWN, lastfield_size, True) - log.debug('_make_gaps: adding last field at offset %d:%d', gap.offset, gap.offset + len(gap)) - gaps.append(gap) - else: - self._aligned_gaps(_record, len(_record), nextoffset, gaps) - return gaps - - def _aligned_gaps(self, _record, endoffset, nextoffset, gaps): - """ if nextoffset is aligned - add a gap to gaps, or - if nextoffset is not aligned - add (padding + gap) to gaps - """ - if nextoffset % self._word_size == 0: - gap = fieldtypes.Field('gap_%d' % nextoffset, nextoffset, fieldtypes.UNKNOWN, endoffset - nextoffset, False) - log.debug('_make_gaps: adding field at offset %d:%d', gap.offset, gap.offset + len(gap)) - gaps.append(gap) - else: - # we need a field of endoffset - nextoffset bytes. - # unaligned field should be splitted - size = endoffset - nextoffset - if size < self._word_size: - s1 = size - else: - s1 = size - size % self._word_size - gap1 = fieldtypes.Field('gap_%d' % nextoffset, nextoffset, fieldtypes.UNKNOWN, s1, True) - log.debug('_make_gaps: Unaligned field at offset %d:%d', gap1.offset, gap1.offset + len(gap1)) - gaps.append(gap1) - if nextoffset + s1 < endoffset: - _offset = nextoffset + s1 - _size = endoffset - nextoffset - s1 - gap2 = fieldtypes.Field('gap_%d' % _offset, _offset, fieldtypes.UNKNOWN, _size, True) - log.debug('_make_gaps: adding field at offset %d:%d', gap2.offset, gap2.offset + len(gap2)) - gaps.append(gap2) - return - - -class TextFieldCorrection(model.AbstractReverser): - """ - Second pass on records to fix text fields. - a) utf16 could be non aligned. We look for small_int+utf16. and aggregate. - b) terminating null bytes. Due to padding there could be more than 1 byte worth. aggregate. - c) if record has one null terminated str, Rename record type as cstring. rename/retype parent pointers + comment. - """ - REVERSE_LEVEL = 11 - - def reverse_record(self, _context, _record): - fields = _record.get_fields() - if False: - # corrected in non-aligned FieldReverser - # a) utf16 could be non aligned. We look for small_int+utf16. and aggregate. - for i, f1 in enumerate(fields[:-1]): - if f1.field_type is not fieldtypes.SMALLINT: - continue - f2 = fields[i+1] - if f2.field_type is not fieldtypes.STRING16: - continue - # b) terminating null bytes. Due to padding there could be more than 1 byte worth. aggregate. - if len(fields) > 1: - f1, f2 = fields[-2:] - if f2.is_zeroes() and len(f2) == 4: - if f1.is_string() and f1.field_type == fieldtypes.STRING16: - # FIXME: DO WHAT ? aggregate ? set zerroes as padding ? - # set f2 as padding. ??? - pass - # c) if record has one null terminated str, Rename record type as cstring. - # rename/retype parent pointers + comment. - if len(fields) == 2 and fields[0].is_string() and fields[1].is_zeroes(): - _record.set_name('string') - - return _record - - -class IntegerArrayFields(model.FieldAnalyser): - """ TODO """ - - def make_fields(self, _record, offset, size): - # this should be last resort - my_bytes = _record.bytes[offset:offset + size] - size = len(my_bytes) - if size < 4: - return False - ctr = collections.Counter([my_bytes[i:i + self._word_size] for i in range(len(my_bytes))]) - floor = max(1, int(size * .1)) # 10 % variation in values - # commons = [ c for c,nb in ctr.most_common() if nb > 2 ] - commons = ctr.most_common() - if len(commons) > floor: - return False # too many different values - # few values. it migth be an array - # FIXME - # _record.values = my_bytes - # _record.comment = '10%% var in values: %s' % (','.join([repr(v) for v, nb in commons])) - return True - - diff --git a/haystack/reverse/heuristics/interfaces.py b/haystack/reverse/heuristics/interfaces.py deleted file mode 100644 index 05e4ec3b..00000000 --- a/haystack/reverse/heuristics/interfaces.py +++ /dev/null @@ -1,62 +0,0 @@ -# -*- coding: utf-8 -*- - - -class IReverser(object): - """ - Signature for a reverser. - """ - def reverse(self): - """ - Run the reversing algorithm. - - :return: - """ - raise NotImplementedError(self) - - -class IContextReverser(object): - """ - Signature for a reverser. - """ - def reverse_context(self, _context): - """ - Run the reversing algorithm. - - :return: - """ - raise NotImplementedError(self) - - -class IRecordReverser(object): - """ - A class that will apply reversing heuristics on a Record scope. - """ - def reverse_record(self, _record): - """ - Run the reversing algorithm on this record - - :param _record: the target of the reverse heuristics. - :return: - """ - raise NotImplementedError(self) - - def get_reverse_level(self): - """ - Return the level of reversing that this IReverser brings to the record. - Basically help in ordering reversers between them. - """ - raise NotImplementedError(self) - - -class IFieldReverser(object): - """ - A class that will apply reversing heuristics on a Field scope. - """ - def reverse_field(self, _field): - """ - Run the reversing algorithm on this field - - :param _field: the target of the reverse heuristics. - :return: - """ - raise NotImplementedError(self) diff --git a/haystack/reverse/heuristics/model.py b/haystack/reverse/heuristics/model.py deleted file mode 100644 index ed02663b..00000000 --- a/haystack/reverse/heuristics/model.py +++ /dev/null @@ -1,184 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -import logging -import time - -from haystack.abc import interfaces as hi -from haystack.reverse.heuristics import interfaces as hri -from haystack.reverse import context - - -log = logging.getLogger('model') - - -class AbstractReverser(hri.IReverser): - - REVERSE_LEVEL = 0 - - def __init__(self, _memory_handler, reverse_level=None): - if not isinstance(_memory_handler, hi.IMemoryHandler): - raise TypeError('memory_handler should be an IMemoryHandler') - self._memory_handler = _memory_handler - if reverse_level is None: - self._reverse_level = self.REVERSE_LEVEL - else: - self._reverse_level = reverse_level - self._target = self._memory_handler.get_target_platform() - self._word_size = self._target.get_word_size() - # metadata - self._t0 = self._t1 = self._nb_reversed = self._nb_from_cache = 0 - self._fout = None - self._towrite = None - - def get_reverse_level(self): - return self._reverse_level - - def _iterate_contexts(self): - """ Override to change the list of contexts """ - # for ctx in self._memory_handler.get_cached_context(): - finder = self._memory_handler.get_heap_finder() - walkers = finder.list_heap_walkers() - # we need to get then either from memory_handler or from file or from scratch - for heap_walker in walkers: - ctx = context.get_context_for_address(self._memory_handler, heap_walker.get_heap_address()) - yield ctx - - def _iterate_records(self, _context): - """ Override to change the list of record for this _context """ - for _record in _context.listStructures(): - if _record.get_reverse_level() >= self.get_reverse_level(): - continue - yield _record - - def _iterate_fields(self, _context, _record): - """ Override to change the list of field for this _record """ - for _field in _record.get_fields(): - yield _field - - def reverse(self): - """ - Go over each record and call the reversing process. - Wraps around some time-based function to ease the wait. - Saves the context to cache at the end. - """ - log.info('[+] %s: START', self) - # run the reverser - for _context in self._iterate_contexts(): - self._t0 = time.time() - self._t1 = self._t0 - self._nb_reversed = 0 - self._nb_from_cache = 0 - # call - self.reverse_context(_context) - # save the context - _context.save() - # closing statements - total = self._nb_from_cache + self._nb_reversed - ts = time.time() - self._t0 - log.debug('[+] %s: END %d records in %2.0fs (new:%d,cache:%d)', self, total, ts, self._nb_reversed, self._nb_from_cache) - #### - return - - def reverse_context(self, _context): - """ - Go over each record and call the reversing process. - Wraps around some time-based function to ease the wait. - Saves the context to cache at the end. - """ - log.info('[+] %s: START on heap 0x%x', self, _context._heap_start) - t0 = time.time() - for _record in self._iterate_records(_context): - # call the heuristic - self.reverse_record(_context, _record) - # can call get_record_count because of loop - # #self._callback(total=_context.get_record_count()) - # closing statements - total = self._nb_from_cache + self._nb_reversed - ts = time.time() - t0 - log.debug('[+] %s: END time:%2.0fs Heap:0x%x records:%d (new:%d,cache:%d)', self, ts, _context._heap_start, ts, self._nb_reversed, self._nb_from_cache) - return - - def reverse_record(self, _context, _record): - """ - Subclass implementation of the reversing process - - Should set _reverse_level of _record. - """ - if _record.get_reverse_level() >= self.get_reverse_level(): - # ignore this record. its already reversed. - self._nb_from_cache += 1 - else: - self._nb_reversed += 1 - for _field in self._iterate_fields(_context, _record): - self.reverse_field(_context, _record, _field) - # set our new reserve level - _record.set_reverse_level(self.get_reverse_level()) - # sate the _record - _record.saveme(_context) - return - - def reverse_field(self, _context, _record, _field): - """ - Subclass implementation of the reversing process - """ - return - - def _callback(self, total): - """ callback for human use """ - # every 30 secs, print a statement, save text repr to file. - if time.time() - self._t1 > 30: - t1 = time.time() - rate = (t1 - self._t0) / (1 + self._nb_reversed + self._nb_from_cache) - _ttg = (total - (self._nb_from_cache + self._nb_reversed)) * rate - log.info('%2.2f seconds to go (new:%d,cache:%d)', _ttg, self._nb_reversed, self._nb_from_cache) - return - - def __str__(self): - return '<%s>' % self.__class__.__name__ - - -class WriteRecordToFile(AbstractReverser): - - def reverse_context(self, _context): - self._fout = open(_context.get_filename_cache_headers(), 'w') - self._towrite = [] - super(WriteRecordToFile, self).reverse_context(_context) - self._write() - self._fout.close() - - def reverse_record(self, _context, _record): - super(WriteRecordToFile, self).reverse_record(_context, _record) - # output headers - self._towrite.append(_record.to_string()) - - def _write(self): - self._fout.write('\n'.join(self._towrite)) - self._towrite = [] - pass - - -class FieldAnalyser(object): - """ - - """ - def __init__(self, memory_handler): - if not isinstance(memory_handler, hi.IMemoryHandler): - raise TypeError('memory_handler should be an IMemoryHandler') - self._memory_handler = memory_handler - self._target = self._memory_handler.get_target_platform() - self._word_size = self._target.get_word_size() - - def make_fields(self, structure, offset, size): - """ - @param structure: the structure object, with a bytes() - @param offset: the offset of the field to analyze - @param size: the size of said field - - @return False, or [Field(), ] - """ - raise NotImplementedError('This should be implemented.') - - diff --git a/haystack/reverse/heuristics/pointertypes.py b/haystack/reverse/heuristics/pointertypes.py deleted file mode 100644 index e6d9a81e..00000000 --- a/haystack/reverse/heuristics/pointertypes.py +++ /dev/null @@ -1,107 +0,0 @@ -# -*- coding: utf-8 -*- - -import logging - -from haystack.reverse import context -from haystack.reverse.heuristics import model -from haystack.reverse.heuristics import radare - -log = logging.getLogger("pointertypes") - - -class PointerFieldReverser(model.AbstractReverser): - """ - Identify pointer fields and their target structure. - - For all pointer fields in a structure, - try to enrich the field name with information about the child structure. - - All structure should have been Analysed, otherwise, - results are not going to be untertaining. - """ - REVERSE_LEVEL = 50 - - def __init__(self, _memory_handler): - super(PointerFieldReverser, self).__init__(_memory_handler) - # process_context = self._memory_handler.get_reverse_context() - # self.__functions_pointers = process_context.get_functions_pointers() - - def reverse_record(self, _context, _record): - """ - @returns structure, with enriched info on pointer fields. - For pointer fields value: - (-) if pointer value is in _memory_handler ( well it is... otherwise it would not be a pointer.) - + if value is unaligned, mark it as cheesy - + ask _memory_handler for the context for that value - - if context covers a data lib, it would give function names, .data , .text ( CodeContext ) - - if context covers a HEAP/heap extension (one context for multiple mmap possible) it would give allocators - + ask context for the target structure or code info - - if retobj is structure, enrich pointer with info - """ - # If you want to cache resolved infos, it still should be decided by - # the caller - pointer_fields = [field for field in _record.get_fields() if field.is_pointer()] - log.debug('got %d pointer fields', len(pointer_fields)) - for field in pointer_fields: - value = _record.get_value_for_field(field) - field.set_pointee_addr(value) # default - # FIXME field.set_resolved() # What ? - # + if value is unaligned, mark it as cheesy - if value % self._target.get_word_size(): - field.comment = 'Unaligned pointer value' - # + ask _memory_handler for the context for that value - try: - ctx = context.get_context_for_address(self._memory_handler, value) # no error expected. - # + ask context for the target structure or code info - except ValueError as e: - # value is a pointer, but not to a heap. - m = self._memory_handler.get_mapping_for_address(value) - # field.set_child_desc('ext_lib @%0.8x %s' % (m.start, m.pathname)) - field.set_pointer_to_ext_lib() - field.set_pointee_ctype('void') - # TODO: Function pointer ? - field.name = 'ptr_ext_lib_%d' % field.offset - # if value in self.__functions_pointers: - # size, bbs, name = self.__functions_pointers[value] - # field.name = 'func_ptr_%s_%d' % (name, field.offset) - continue - tgt = None - try: - # get enclosing structure @throws KeyError - tgt = ctx.get_record_at_address(value) - # there is no child structure member at pointed value. - except (IndexError, ValueError) as e: - log.debug('there is no child structure enclosing pointed value %0.8x - %s', value, e) - field.set_pointee_desc('MemoryHandler management space') - field.set_pointee_ctype('void') - field.name = 'ptr_void_%d' % field.offset - continue - # structure found - ## log.debug('Looking at child id:0x%x str:%s', tgt.address, tgt.to_string()) - # we always point on structure, not field - field.set_pointee_addr(tgt.address) - offset = value - tgt.address - try: - tgt_field = tgt.get_field_at_offset(offset) # @throws IndexError - except IndexError as e: - # there is no field right there - log.debug('there is no field at pointed value %0.8x. May need splitting byte field - %s', value, e) - field.set_pointee_desc('Badly reversed field') - field.set_pointee_ctype('void') - field.name = 'ptr_void_%d' % field.offset - continue - # do not put exception for field 0. structure name should appears - # anyway. - field.set_pointee_desc('%s.%s' % (tgt.name, tgt_field.name)) - # TODO: - # do not complexify code by handling target field type, - # lets start with simple structure type pointer, - # later we would need to use tgt_field.ctypes depending on field - # offset - field.set_pointee_ctype(tgt.name) - # field.name = '%s_%s_%d' % (tgt.name, tgt_field.name, field.offset) - field.name = 'ptr_%s_%d' % (tgt.name, field.offset) - # all - - _record.set_reverse_level(self._reverse_level) - return diff --git a/haystack/reverse/heuristics/radare.py b/haystack/reverse/heuristics/radare.py deleted file mode 100644 index f7e0bd1f..00000000 --- a/haystack/reverse/heuristics/radare.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- - -# git clone https://github.com/radare/radare2.git -from __future__ import print_function -import logging - -#import r2pipe - -log = logging.getLogger('radare') - - -class RadareAnalysis(object): - """ - Use radare to get more info about non heaps - """ - def __init__(self, memory_handler): - self._memory_handler = memory_handler - self.functions = {} - - def init_all_functions(self): - for a_map in self._memory_handler.get_mappings(): - self.find_functions(a_map) - - def find_functions(self, mapping): - fname = mapping._memdumpname - log.debug('Opening %s', fname) - # FIXME is that even useful - import r2pipe - r2 = r2pipe.open(fname) - r2.cmd("aaa") - analysis = r2.cmd("afl") - print(analysis) - res = analysis.split('\n') - log.debug("len %d - %d", len(analysis), len(res)) - #if len(analysis) > 40: - # import pdb - # pdb.set_trace() - nb = 0 - for f_line in res: - if "0x" not in res: - continue - addr, size, bbs, name = f_line.split(' ') - addr = int(addr, 16) - if addr == 0x0: - continue - size = int(size) - bbs = int(bbs) - self.functions[mapping.start+addr] = (size, bbs, name) - nb += 1 - log.debug('Found %d functions in 0x%x', nb, mapping.start) \ No newline at end of file diff --git a/haystack/reverse/heuristics/reversers.py b/haystack/reverse/heuristics/reversers.py deleted file mode 100644 index 7b5ade6f..00000000 --- a/haystack/reverse/heuristics/reversers.py +++ /dev/null @@ -1,680 +0,0 @@ -# -*- coding: utf-8 -*- - -import logging -import os -import struct -import sys -import time - -from haystack.reverse import config -from haystack.reverse import context -from haystack.reverse import fieldtypes -from haystack.reverse import pattern -from haystack.reverse import structure -from haystack.reverse import utils -from haystack.reverse.heuristics import model -from haystack.reverse.heuristics import signature -from past.builtins import long - -""" -BasicCachingReverser: - use heapwalker to organise heap user allocations chunks into raw records. - -AbstractRecordReverser: - Implement this class when you are delivering a IRecordReverser - The reverse method will iterate on all record in a context and call reverse_record - -FieldReverser: - Decode each structure by asserting simple basic types from the byte content. - Text, Pointers, Integers... - -PointerFieldReverser: - Identify pointer fields and their target structure. - -DoubleLinkedListReverser: - Identify double Linked list. ( list, vector, ... ) - -PointerGraphReverser: - use the pointer relation between records to map a graph. - -save_headers: - Save the python class code definition to file. - -reverse_instances: - # we use common allocators to find allocators. - use DoubleLinkedListReverser to try to find some double linked lists records - use FieldReverser to decode bytes contents to find basic types - use PointerFieldReverser to identify pointer relation between allocators - use PointerGraphReverser to graph pointer relations between allocators - save guessed records' python code definition to file -""" - -log = logging.getLogger('reversers') - - -class BasicCachingReverser(model.AbstractReverser): - """ - Uses heapwalker to get user allocations into allocators in cache. - This reverser should be use as a first step in the reverse process. - """ - - REVERSE_LEVEL = 1 - - def _iterate_records(self, _context): - for x in enumerate(zip(map(long, self._allocations), map(long, _context.list_allocations_sizes()))): - yield x - - def reverse_context(self, _context): - log.info('[+] Reversing user allocations into cache') - self._loaded = 0 - self._unused = 0 - # FIXME why is that a LIST ????? - self._done_records = _context._structures.keys() - self._allocations = _context.list_allocations_addresses() - # - self._todo = sorted(set(self._allocations) - set(self._done_records)) - self._fromcache = len(self._allocations) - len(self._todo) - log.info('[+] Adding new raw allocators from user allocations - %d todo', len(self._todo)) - super(BasicCachingReverser, self).reverse_context(_context) - - def reverse_record(self, _context, _record): - i, (ptr_value, size) = _record - if ptr_value in self._done_records: - sys.stdout.write('.') - sys.stdout.flush() - return - self._loaded += 1 - if size < 0: - log.error("Negative allocation size") - raise ValueError("Negative allocation size") - mystruct = structure.AnonymousRecord(_context.memory_handler, ptr_value, size) - _context._structures[ptr_value] = mystruct - # cache to disk - mystruct.saveme(_context) - return - - -class KnownRecordTypeReverser(model.AbstractReverser): - """ - Use the list of record type name provided to try to identify know records. - - The ProcessContext model must have been loaded with the appropriate ctypes module - memory_handler.get_reverse_context().get_model().import_module(*args) - then any ctypes.Structure or Union of these module can be searched for. - - This reverser should be use as a second step in the reverse process. - """ - - REVERSE_LEVEL = 2 - - def __init__(self, _memory_handler, record_names, record_constraints): - super(KnownRecordTypeReverser, self).__init__(_memory_handler) - self._process_context = self._memory_handler.get_reverse_context() - self.__record_names = record_names - self.__constraints = record_constraints - self.__search_results = {} - - def _iterate_records(self, _context): - for x in self.__record_names: - yield x - - def reverse_record(self, _context, record_name): - """ - _record is actually a record_type_name - """ - from haystack.search import api - modulename, sep, classname = record_name.rpartition('.') - module = self._memory_handler.get_model().import_module(modulename) - record_type = getattr(module, classname) - # now launch the search - results = api.search_record(self._memory_handler, record_type, self.__constraints, False) - - addresses = [addr for _, addr in results] - self.__search_results[record_name] = addresses - return - - def get_search_results(self): - return self.__search_results - - -class DoubleLinkedListReverser(model.AbstractReverser): - """ - Identify double Linked list. ( list, vector, ... ) - - All allocation in the list must have the same size. - All pointer field should be at the same offset. - - FIXME: make a "KnownRecordTyepReverser" - That can apply on the full allocated chunk or a subsets of fields. - - Use a LIST_ENTRY in that reverser to replace this. - - class LIST_ENTRY(ctypes.Structure): - _fields_ = [('Next', ctypes.POINTER(LIST_ENTRY)), - ('Back', ctypes.POINTER(LIST_ENTRY))] - - we also need advanced constraints in the search API to be able to check for next_back == current ... - """ - - REVERSE_LEVEL = 30 - - def __init__(self, _memory_handler): - super(DoubleLinkedListReverser, self).__init__(_memory_handler) - self.found = 0 - self.members = set() - self.lists = {} - self._process_context = self._memory_handler.get_reverse_context() - - def _is_record_address_in_lists(self, address, field_offset, record_size): - # there could be multiple list of record of same length, - # with list entry fields at the same offset - # NOT extend - if record_size in self.lists: - if field_offset in self.lists[record_size]: - for members in self.lists[record_size][field_offset]: - if address in members: - return True - return False - - def _add_new_list(self, field_offset, record_size, list_items): - if record_size not in self.lists: - self.lists[record_size] = {} - if field_offset not in self.lists[record_size]: - self.lists[record_size][field_offset] = [] - # there could be multiple list of record of same length, - # with list entry fields at the same offset - # NOT extend - self.lists[record_size][field_offset].append(list_items) - return - - def reverse_record(self, _context, _record): - """ - Check if we find a LIST_ENTRY construct basically at every field. - Returns fast if _record's reverse level is over this one. - """ - # FIXME, we should check any field offset where a,b is a couple of pointer to the same type - if _record.get_reverse_level() >= self.get_reverse_level(): - # ignore this record. its already reversed. - self._nb_from_cache += 1 - else: - # we will at least only try around valid pointerfields. - for _field in _record.get_fields()[:-1]: - if _field.is_pointer(): - self.reverse_field(_context, _record, _field) - self._nb_reversed += 1 - return - - def reverse_field(self, _context, _record, _field): - """ - Check if we find a LIST_ENTRY construct basically at this field/word + the next one. - Returns fast if this field's is already part of a list. - """ - offset = _field.offset - ptr_value = _field.offset + _record.address - size = len(_record) - # check if the ptr is a known member at this offset - if self._is_record_address_in_lists(_record.address, offset, len(_record)): - self._nb_from_cache += 1 - elif self.is_linked_list_member(_context, ptr_value, offset, size): - # _members will contain record's address for this offset, back and next. - head_addr, _members = self.iterate_list(_context, ptr_value, offset, size) - if _members is not None: - self._add_new_list(offset, len(_record), _members) - self._nb_reversed += len(_members) - self.found += 1 - log.debug('0x%x is a linked_list_member in a list of %d members', head_addr, len(_members)) - else: - log.debug('Iterate_list returned no list members') - else: - log.debug('0x%x is not a linked_list_member', ptr_value) - - def is_linked_list_member(self, _context, ptr_value, offset, size): - """ - Checks if this address hold a DoubleLinkedPointer record with forward and backward pointers. - with b=ptr_value-offset, pointers are valid for a->b<-c - - Check that _next and _back are valid record in heap - :param ptr_value: - :return: - """ - _next, _back = self.get_two_pointers(_context, ptr_value) - if (_next == ptr_value) or (_back == ptr_value): - # this are self pointers that could be a list head or end - log.debug('Either f1(%s) or f2(%s) points to self', _next == ptr_value, _back == ptr_value) - return False - tn = _context.is_known_address(_next-offset) - tb = _context.is_known_address(_back-offset) - if not (tn and tb): - # at least one pointer value is dangling. - log.debug('Either Next(%s) or Back(%s) ptr are not records in heap', tn, tb) - return False - # classic LIST_ENTRY - # log.debug('Next and Back are pointing to known records fields') - # get next and prev in the same HEAP - _next_next, _next_back = self.get_two_pointers(_context, _next) - _back_next, _back_back = self.get_two_pointers(_context, _back) - # check if the three pointer work - cbn = (ptr_value == _next_back) - cnb = (ptr_value == _back_next) - if not (cbn and cnb): - log.debug('ptr->next->previous not met on cbn(%s) or cnb(%s)', cbn, cnb) - return False - # checking the size of the items - if len(_context.get_record_for_address(_next-offset)) != size: - log.debug('ptr->next size != %s', size) - return False - if len(_context.get_record_for_address(_back-offset)) != size: - log.debug('ptr->back size != %s', size) - return False - return True - - def get_two_pointers(self, _context, st_addr, offset=0): - """ - Read two words from an address as to get 2 pointers out. - usually that is what a double linked list structure is. - """ - # TODO add PEP violation fmt ignore. get_word_type_char returns a str() - fmt = str(self._target.get_word_type_char()*2) - m = _context.memory_handler.get_mapping_for_address(st_addr + offset) - _bytes = m.read_bytes(st_addr + offset, 2 * self._target.get_word_size()) - return struct.unpack(fmt, _bytes) - - def iterate_list(self, _context, _address, offset, size): - """ - Iterate the list starting at _address. - - Given list: a <-> b <-> c <-> d - _address is either b or c - We will return a,b,c,d - - :param _address: - :return: - """ - # FIXME, we are missing a and d - if not self.is_linked_list_member(_context, _address, offset, size): - return None, None - ends = [] - members = [_address-offset] - _next, _back = self.get_two_pointers(_context, _address) - current = _address - # check that a->_address<->_next<-c are part of the list - while self.is_linked_list_member(_context, _next, offset, size): - if _next-offset in members: - log.debug('loop from 0x%x to member 0x%x', current-offset, _next-offset) - break - members.append(_next-offset) - _next, _ = self.get_two_pointers(_context, _next) - current = _next - # we found an end - ends.append((current, 'Next', _next)) - if _next-offset not in members: - members.append(_next-offset) - - # now the other side - current = _address - while self.is_linked_list_member(_context, _back, offset, size): - if _back-offset in members: - log.debug('loop from 0x%x to member 0x%x', current-offset, _back-offset) - break - members.insert(0, _back-offset) - _, _back = self.get_two_pointers(_context, _back) - current = _back - # we found an end - ends.append((current, 'Back', _back)) - if _back-offset not in members: - members.insert(0, _back-offset) - - log.debug('head:0x%x members:%d tail:0x%x', current, len(members), ends[0][0]) - #for m in members: - # print hex(m), '->', - #print - return current-offset, members - - def find_common_type_signature(self, members): - rev = signature.CommonTypeReverser(self._memory_handler, members) - rev.reverse() - best_sig, best_addr = rev.calculate() - return best_addr - - def rename_record_type(self, _members, offset): - """ - Change the type of the 2 pointers to a substructure. - Rename the field to reflect this . - Rename the _record ? - - :param _context: - :param _members: - :param offset: - :param head_addr: - :return: - """ - # we look at each item and get the most common signature between all items - best_member = self.find_common_type_signature(_members) - - # use member[1] instead of head, so that we have a better chance for field types. - # in head, back pointer is probably a zero value, not a pointer field type. - _context = self._process_context.get_context_for_address(best_member) - _record = _context.get_record_for_address(best_member) - # we need two pointer fields to create a substructure. - ## Check if field at offset is a pointer, If so change it name, otherwise split - old_next = _record.get_field_at_offset(offset) - old_back = _record.get_field_at_offset(offset+self._word_size) - # - next_field = fieldtypes.PointerField('Next', 0, self._word_size) - back_field = fieldtypes.PointerField('Back', self._word_size, self._word_size) - sub_fields = [next_field, back_field] - # make a substructure - new_field = fieldtypes.RecordField(_record, offset, 'list', 'LIST_ENTRY', sub_fields) - fields = [x for x in _record.get_fields()] - fields.remove(old_next) - if old_next == old_back: - # its probably a LIST_ENTRY btw. - log.debug("old_next == old_back, aborting") - return _record.record_type - fields.remove(old_back) - fields.append(new_field) - fields.sort() - - # create a new type - head_addr = _members[0] - _record_type = structure.RecordType('list_%x' % head_addr, len(_record), fields) - log.debug("Created Record Type %s", _record_type.to_string()) - - # apply the fields template to all members of the list - for list_item_addr in _members: - _context = self._process_context.get_context_for_address(list_item_addr) - _item = _context.get_record_for_address(list_item_addr) - ### KEEP THIS - if len(_item) != len(_record): - log.warning("x2 linked reverser: len(_item) != len(_record)") - else: - _item.set_record_type(_record_type, True) - - # push the LIST_ENTRY type into the context/memory_handler - rev_context = self._memory_handler.get_reverse_context() - rev_context.add_reversed_type(_record_type, _members) - - # change the list_head name back - _context = self._process_context.get_context_for_address(head_addr) - _context.get_record_for_address(head_addr).set_name('list_head') - return _record_type - - def debug_lists(self): - for size, v in self.lists.items(): - log.debug("Lists of items of size %d: %d lists", size, len(v)) - for offset, res in v.items(): - log.debug("\tLists at offset %d: %d lists", offset, len(res)) - for _list in res: - log.debug("%s items:\t[%s]", len(_list), ','.join([hex(addr) for addr in _list])) - - def rename_all_lists(self): - # rename all lists - for size, offset_lists in self.lists.items(): - for offset, multiple_lists in offset_lists.items(): - for members_list in multiple_lists: - nb = len(members_list) - rt = self.rename_record_type(members_list, offset) - log.debug('%d members for : %s', nb, rt.to_string()) - - -class PointerGraphReverser(model.AbstractReverser): - """ - use the pointer relation between structure to map a graph. - """ - REVERSE_LEVEL = 150 - - def __init__(self, _memory_handler): - super(PointerGraphReverser, self).__init__(_memory_handler) - import networkx - self._master_graph = networkx.DiGraph() - self._heaps_graph = networkx.DiGraph() - self._graph = None - - def reverse(self): - super(PointerGraphReverser, self).reverse() - import networkx - dumpname = self._memory_handler.get_name() - outname1 = os.path.sep.join([config.get_cache_folder_name(dumpname), config.CACHE_GRAPH]) - outname2 = os.path.sep.join([config.get_cache_folder_name(dumpname), config.CACHE_GRAPH_HEAP]) - - log.info('[+] Process Graph == %d Nodes', self._master_graph.number_of_nodes()) - log.info('[+] Process Graph == %d Edges', self._master_graph.number_of_edges()) - networkx.readwrite.gexf.write_gexf(self._master_graph, outname1) - log.info('[+] Process Heaps Graph == %d Nodes', self._heaps_graph.number_of_nodes()) - log.info('[+] Process Heaps Graph == %d Edges', self._heaps_graph.number_of_edges()) - networkx.readwrite.gexf.write_gexf(self._heaps_graph, outname2) - return - - def reverse_context(self, _context): - import networkx - # we only need the addresses... - self._graph = networkx.DiGraph() - t0 = time.time() - tl = t0 - context_heap = hex(_context._heap_start) - for _record in _context.listStructures(): - # in all case - self._graph.add_node(hex(_record.address), heap=context_heap, weight=len(_record)) - self._master_graph.add_node(hex(_record.address), heap=context_heap, weight=len(_record)) - self._heaps_graph.add_node(hex(_record.address), heap=context_heap, weight=len(_record)) - self.reverse_record(_context, _record) - # output headers - # - log.info('[+] Heap %s Graph += %d Nodes', context_heap, self._graph.number_of_nodes()) - log.info('[+] Heap %s Graph += %d Edges', context_heap, self._graph.number_of_edges()) - networkx.readwrite.gexf.write_gexf(self._graph, _context.get_filename_cache_graph()) - ## - return - - def reverse_record(self, heap_context, _record): - ptr_value = _record.address - # targets = set(( '%x'%ptr_value, '%x'%child.target_struct_addr ) - # for child in struct.getPointerFields()) #target_struct_addr - # target_struct_addr - - pointer_fields = [f for f in _record.get_fields() if f.is_pointer()] - for f in pointer_fields: - pointee_addr = f._child_addr - # we always feed these two - # TODO: if a Node is out of heap/segment, replace it by a virtual node & color representing - # the foreign heap/segment - self._graph.add_edge(hex(_record.address), hex(pointee_addr)) - # add a colored node - self._master_graph.add_edge(hex(_record.address), hex(pointee_addr)) - # but we only feed the heaps graph if the target is known - heap = self._memory_handler.get_mapping_for_address(pointee_addr) - try: - heap_context = context.get_context_for_address(self._memory_handler, pointee_addr) - except ValueError as e: - continue - #heap_context = self._memory_handler.get_reverse_context().get_context_for_heap(heap) - if heap_context is None: - continue - # add a heap color - context_heap = hex(heap_context._heap_start) - self._graph.add_node(hex(pointee_addr), heap=context_heap) - self._master_graph.add_node(hex(pointee_addr), heap=context_heap) - self._heaps_graph.add_node(hex(pointee_addr), heap=context_heap) - try: - pointee = heap_context.get_record_at_address(pointee_addr) - except IndexError as e: - continue - except ValueError as e: - continue - self._heaps_graph.add_edge(hex(_record.address), hex(pointee_addr)) - # add a weight - self._graph.add_node(hex(pointee_addr), weight=len(_record)) - self._master_graph.add_node(hex(pointee_addr), weight=len(_record)) - self._heaps_graph.add_node(hex(pointee_addr), weight=len(_record)) - return - - def load_process_graph(self): - import networkx - dumpname = self._memory_handler.get_name() - fname = os.path.sep.join([config.get_cache_folder_name(dumpname), config.CACHE_GRAPH]) - my_graph = networkx.readwrite.gexf.read_gexf(fname) - return my_graph - - - -class ArrayFieldsReverser(model.AbstractReverser): - """ - Aggregate fields of similar type into arrays in the record. - - Check d4008 in zeus. nice array - d2008 is a major player - 90688 too - """ - REVERSE_LEVEL = 200 - - def reverse_record(self, _context, _record): - """ - Aggregate fields of similar type into arrays in the record. - """ - if _record.get_reverse_level() < 30: - raise ValueError('The record reverse level needs to be >30') - - log.debug('0x%x: %s', _record.address, _record.get_signature_text()) - - _record._dirty = True - - _record._fields.sort() - myfields = [] - - signature = _record.get_signature() - pencoder = pattern.PatternEncoder(signature, minGroupSize=3) - patterns = pencoder.makePattern() - - #txt = self.getSignature(text=True) - #log.warning('signature of len():%d, %s'%(len(txt),txt)) - #p = pattern.findPatternText(txt, 2, 3) - # log.debug(p) - - #log.debug('aggregateFields came up with pattern %s'%(patterns)) - - # pattern is made on FieldType, - # so we need to dequeue self.fields at the same time to enqueue in - # myfields - for nb, fieldTypesAndSizes in patterns: - # print 'fieldTypesAndSizes:',fieldTypesAndSizes - if nb == 1: - fieldType = fieldTypesAndSizes[0] # its a tuple - field = _record._fields.pop(0) - myfields.append(field) # single el - #log.debug('simple field:%s '%(field) ) - # array of subtructure DEBUG XXX TODO - elif len(fieldTypesAndSizes) > 1: - log.debug('substructure with sig %s' % fieldTypesAndSizes) - myelements = [] - for i in range(nb): - fields = [ _record._fields.pop(0) for i in range(len(fieldTypesAndSizes))] # nb-1 left - #otherFields = [ self.fields.pop(0) for i in range((nb-1)*len(fieldTypesAndSizes)) ] - # need global ref to compare substructure signature to - # other anonstructure - firstField = fieldtypes.RecordField(_record, fields[0].offset, 'unk', 'typename', fields) - myelements.append(firstField) - array = fieldtypes.ArrayField(myelements) - myfields.append(array) - #log.debug('array of structure %s'%(array)) - elif len(fieldTypesAndSizes) == 1: # make array of elements or - log.debug("found array of %s", _record._fields[0].typename.basename) - fields = [_record._fields.pop(0) for i in range(nb)] - array = fieldtypes.ArrayField(fields) - myfields.append(array) - #log.debug('array of elements %s'%(array)) - else: # TODO DEBUG internal struct - raise ValueError("fields patterns len is incorrect %d" % len(fieldTypesAndSizes)) - - log.debug('done with aggregateFields') - _record.reset() - # _record.add_fields(myfields) - _record_type = structure.RecordType('struct_%x' % _record.address, len(_record), myfields) - _record.set_record_type(_record_type) - _record.set_reverse_level(self._reverse_level) - # print 'final', self.fields - log.debug('0x%x: %s', _record.address, _record.get_signature_text()) - return - - -class InlineRecordReverser(model.AbstractReverser): - """ - Detect record types in a large one . - """ - REVERSE_LEVEL = 200 - - def reverse_record(self, _context, _record): - if not _record.resolvedPointers: - raise ValueError('I should be resolved') - _record._dirty = True - _record._fields.sort() - myfields = [] - - signature = _record.get_type_signature() - pencoder = pattern.PatternEncoder(signature, minGroupSize=2) - patterns = pencoder.makePattern() - - txt = _record.get_type_signature(text=True) - p = pattern.findPatternText(txt, 1, 2) - - log.debug('substruct typeSig: %s' % txt) - log.debug('substruct findPatterntext: %s' % p) - log.debug('substruct came up with pattern %s' % patterns) - - # pattern is made on FieldType, - # so we need to dequeue _record.fields at the same time to enqueue in - # myfields - for nb, fieldTypes in patterns: - if nb == 1: - field = _record._fields.pop(0) - myfields.append(field) # single el - # log.debug('simple field:%s '%(field) ) - elif len(fieldTypes) > 1: # array of subtructure DEBUG XXX TODO - log.debug('fieldTypes:%s' % fieldTypes) - log.debug('substructure with sig %s', ''.join([ft.sig[0] for ft in fieldTypes])) - myelements = [] - for i in range(nb): - fields = [_record._fields.pop(0) for i in range(len(fieldTypes))] # nb-1 left - # otherFields = [ _record.fields.pop(0) for i in range((nb-1)*len(fieldTypesAndSizes)) ] - # need global ref to compare substructure signature to - # other anonstructure - firstField = fieldtypes.RecordField(_record, fields[0].offset, 'unk', 'typename', fields) - myelements.append(firstField) - array = fieldtypes.ArrayField(myelements) - myfields.append(array) - # log.debug('array of structure %s'%(array)) - # make array of elements obase on same base type - elif len(fieldTypes) == 1: - log.debug('found array of %s', _record._fields[0].typename.basename) - fields = [_record._fields.pop(0) for i in range(nb)] - array = fieldtypes.ArrayField(fields) - myfields.append(array) - # log.debug('array of elements %s'%(array)) - else: # TODO DEBUG internal struct - raise ValueError( - 'fields patterns len is incorrect %d' % - (len(fieldTypes))) - - log.debug('done with findSubstructure') - _record._fields = myfields - # print 'final', _record.fields - return - - -class StringsReverser(model.AbstractReverser): - """ - Detect record types in a large one . - """ - REVERSE_LEVEL = 500 - - def reverse_context(self, _context): - self.fout = open(_context.get_filename_cache_strings(), 'w') - super(StringsReverser, self).reverse_context(_context) - self.fout.close() - - def reverse_record(self, _context, _record): - for field in _record.get_fields(): - addr = _record.address + field.offset - if field.is_string(): - maxlen = len(field) - value = _record.get_value_for_field(field, maxlen+10) - self.fout.write("0x%x,%d,%s\n" % (addr, maxlen, value)) diff --git a/haystack/reverse/heuristics/signature.py b/haystack/reverse/heuristics/signature.py deleted file mode 100644 index d18dd05b..00000000 --- a/haystack/reverse/heuristics/signature.py +++ /dev/null @@ -1,684 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import print_function -import itertools -import ctypes -import logging -import struct - -import os -import re -import Levenshtein # seqmatcher ? -import networkx -import numpy - -from haystack.reverse import config -import haystack.reverse.matchers -from haystack.utils import xrange -from haystack.reverse import searchers -from haystack.reverse import utils -from haystack.reverse import structure -from haystack.reverse.heuristics import dsa -from haystack.reverse.heuristics import model - -""" -Tools around guessing a field' type and -creating signature for record to compare them. -""" - - -log = logging.getLogger('signature') - - -class TypeReverser(model.AbstractReverser): - """ - """ - REVERSE_LEVEL = 300 - - def __init__(self, memory_handler): - super(TypeReverser, self).__init__(memory_handler) - self._signatures = [] - - def reverse_context(self, _context): - """ - Go over each record and call the reversing process. - Wraps around some time-based function to ease the wait. - Saves the context to cache at the end. - """ - import Levenshtein - log.debug("Gathering all signatures") - for _record in _context.listStructures(): - self._signatures.append((len(_record), _record.address, _record.get_signature_text())) - self._nb_reversed += 1 - self._callback(1) ## FIXME - ## - self._similarities = [] - for i, (size1, addr1, el1) in enumerate(self._signatures[:-1]): - log.debug("Comparing signatures with %s", el1) - for size2, addr2, el2 in self._signatures[i + 1:]: - if abs(size1 - size2) > 4*self._word_size: - continue - lev = Levenshtein.ratio(el1, el2) # seqmatcher ? - if lev > 0.75: - #self._similarities.append( ((addr1,el1),(addr2,el2)) ) - self._similarities.append((addr1, addr2)) - # we do not need the signature. - # check for chains - # TODO we need a group maker with an iterator to push group - # proposition to the user - log.debug('\t[-] Signatures done.') - - for _record in _context.listStructures(): - # do the changes. - self.reverse_record(_context, _record) - #self._callback() - - _context.save() - return - - def persist(self, _context): - outdir = _context.get_folder_cache() - config.create_cache_folder(outdir) - # - outname = _context.get_filename_cache_signatures() - #outname = os.path.sep.join([outdir, self._name]) - ar = utils.int_array_save(outname, self._similarities) - return - - def load(self, _context): - inname = _context.get_filename_cache_signatures() - self._similarities = utils.int_array_cache(inname) - return - - def reverse_record(self, _context, _record): - # TODO: add minimum reversing level check before running - # writing to file - # for ptr_value,anon in context.allocators.items(): - #self._pfa.analyze_fields(_record) - sig = _record.get_signature() - address = _record.address - _record.set_reverse_level(self._reverse_level) - return - - -class CommonTypeReverser(model.AbstractReverser): - """ - From a list of records addresse, find the most common signature. - """ - REVERSE_LEVEL = 31 - - def __init__(self, memory_handler, members): - super(CommonTypeReverser, self).__init__(memory_handler) - self._members = members - self._members_by_context = {} - process_context = self._memory_handler.get_reverse_context() - # organise the list - for record_addr in self._members: - heap_context = process_context.get_context_for_address(record_addr) - if heap_context not in self._members_by_context: - self._members_by_context[heap_context] = [] - self._members_by_context[heap_context].append(record_addr) - # out - self._signatures = {} - self._similarities = [] - - def _iterate_contexts(self): - for c in self._members_by_context.keys(): - yield c - - def _iterate_records(self, _context): - for item_addr in self._members_by_context[_context]: - yield _context.get_record_for_address(item_addr) - - def reverse_record(self, _context, _record): - record_signature = _record.get_signature_text() - if record_signature not in self._signatures: - self._signatures[record_signature] = [] - self._signatures[record_signature].append(_record.address) - - def calculate(self): - # - res = [(len(v), k) for k,v in self._signatures.items()] - res.sort(reverse=True) - total = len(self._members) - best_count = res[0][0] - best_sig = res[0][1] - best_addr = self._signatures[best_sig][0] - log.debug('best match %d/%d is %s: 0x%x', best_count, total, best_sig, best_addr) - return best_sig, best_addr - - -# TODO a Group maker based on field pointer memorymappings and structure -# instance/sizes... - - -class SignatureGroupMaker: - """ - From a list of addresses, groups similar signature together. - HINT: structure should be resolved but not reverse-patternised for arrays...?? - """ - - def __init__(self, context, name, addrs): - self._name = name - self._structures_addresses = addrs - self._context = context - - def _init_signatures(self): - # get text signature for Counter to parse - # need to force resolve of allocators - self._signatures = [] - decoder = dsa.FieldReverser(self._context.memory_handler) - for addr in map(long, self._structures_addresses): - # decode the fields - record = self._context.get_record_for_address(addr) - ## record.decodeFields() # can be long - decoder.analyze_fields(record) - # get the signature for the record - self._signatures.append((addr, self._context.get_record_for_address(addr).get_signature_text())) - return - - def make(self): - self._init_signatures() - # - self._similarities = [] - for i, x1 in enumerate(self._signatures[:-1]): - for x2 in self._signatures[i + 1:]: - addr1, el1 = x1 - addr2, el2 = x2 - lev = Levenshtein.ratio(el1, el2) # seqmatcher ? - if lev > 0.75: - #self._similarities.append( ((addr1,el1),(addr2,el2)) ) - self._similarities.append((addr1, addr2)) - # we do not need the signature. - # check for chains - # TODO we need a group maker with an iterator to push group - # proposition to the user - log.debug('\t[-] Signatures done.') - return - - def persist(self): - outdir = config.get_cache_filename( - config.CACHE_SIGNATURE_GROUPS_DIR, - self._context.dumpname) - config.create_cache_folder(outdir) - # - outname = os.path.sep.join([outdir, self._name]) - ar = utils.int_array_save(outname, self._similarities) - return - - def isPersisted(self): - outdir = config.get_cache_filename( - config.CACHE_SIGNATURE_GROUPS_DIR, - self._context.dumpname) - return os.access(os.path.sep.join([outdir, self._name]), os.F_OK) - - def load(self): - outdir = config.get_cache_filename( - config.CACHE_SIGNATURE_GROUPS_DIR, - self._context.dumpname) - inname = os.path.sep.join([outdir, self._name]) - self._similarities = utils.int_array_cache(inname) - return - - def getGroups(self): - return self._similarities - - -class StructureSizeCache: - - """Loads allocators, get their signature (and size) and sort them in - fast files dictionaries.""" - - def __init__(self, ctx): - self._context = ctx - self._sizes = None - - def _loadCache(self): - outdir = config.get_cache_filename( - config.CACHE_SIGNATURE_SIZES_DIR, - self._context.dumpname) - fdone = os.path.sep.join( - [outdir, config.CACHE_SIGNATURE_SIZES_DIR_TAG]) - if not os.access(fdone, os.R_OK): - return False - for myfile in os.listdir(outdir): - try: - # FIXME: not sure its - - # and what that section is about in general. - addr = int(myfile.split('-')[1], 16) - except IndexError as e: - continue # ignore file - - def cacheSizes(self): - """Find the number of different sizes, and creates that much numpyarray""" - # if not os.access - outdir = config.get_cache_filename( - config.CACHE_SIGNATURE_SIZES_DIR, - self._context.dumpname) - config.create_cache_folder(outdir) - # - sizes = map(int, set(self._context._malloc_sizes)) - arrays = dict([(s, []) for s in sizes]) - # sort all addr in all sizes.. - [arrays[self._context._malloc_sizes[i]].append( - long(addr)) for i, addr in enumerate(self._context._malloc_addresses)] - # saving all sizes dictionary in files... - for size, lst in arrays.items(): - fout = os.path.sep.join([outdir, 'size.%0.4x' % size]) - arrays[size] = utils.int_array_save(fout, lst) - # saved all sizes dictionaries. - # tag it as done - open( - os.path.sep.join([outdir, config.CACHE_SIGNATURE_SIZES_DIR_TAG]), 'w') - self._sizes = arrays - return - - def getStructuresOfSize(self, size): - if self._sizes is None: - self.cacheSizes() - if size not in self._sizes: - return [] - return numpy.asarray(self._sizes[size]) - - def __iter__(self): - if self._sizes is None: - self.cacheSizes() - for size in self._sizes.keys(): - yield (size, numpy.asarray(self._sizes[size])) - - -class SignatureMaker(searchers.AbstractSearcher): - """ - make a condensed signature of the mapping. - We could then search the signature file for a specific signature - """ - - NULL = 0x1 - POINTER = 0x2 - # POINTERS = NULL | POINTER # null can be a pointer value so we can - # byte-test that - OTHER = 0x4 - - def __init__(self, mapping): - searchers.AbstractSearcher.__init__(self, mapping) - self.pSearch = haystack.reverse.matchers.PointerSearcher(self.get_search_mapping()) - self.nSearch = haystack.reverse.matchers.NullSearcher(self.get_search_mapping()) - - def test_match(self, vaddr): - ''' return either NULL, POINTER or OTHER ''' - if self.nSearch.test_match(vaddr): - return self.NULL - if self.pSearch.test_match(vaddr): - return self.POINTER - return self.OTHER - - def search(self): - ''' returns the memspace signature. Dont forget to del that object, it's big. ''' - self._values = b'' - log.debug( - 'search %s mapping for matching values' % - (self.get_search_mapping())) - for vaddr in xrange( - self.get_search_mapping().start, self.get_search_mapping().end, self.WORDSIZE): - self._check_steps(vaddr) # be verbose - self._values += struct.pack('B', self.test_match(vaddr)) - return self._values - - def __iter__(self): - ''' Iterate over the mapping to return the signature of that memspace ''' - log.debug( - 'iterate %s mapping for matching values' % - (self.get_search_mapping())) - for vaddr in xrange( - self.get_search_mapping().start, self.get_search_mapping().end, self.WORDSIZE): - self._check_steps(vaddr) # be verbose - yield struct.pack('B', self.test_match(vaddr)) - return - - -class PointerSignatureMaker(SignatureMaker): - - def test_match(self, vaddr): - ''' return either POINTER or OTHER ''' - if self.pSearch.test_match(vaddr): - return self.POINTER - return self.OTHER - - -class RegexpSearcher(searchers.AbstractSearcher): - - ''' - Search by regular expression in memspace. - ''' - - def __init__(self, mapping, regexp): - searchers.AbstractSearcher.__init__(self, mapping) - self.regexp = regexp - self.pattern = re.compile(regexp, re.IGNORECASE) - - def search(self): - ''' find all valid matches offsets in the memory space ''' - self._values = set() - log.debug( - 'search %s mapping for matching values %s' % - (self.get_search_mapping(), self.regexp)) - for match in self.get_search_mapping().finditer( - self.get_search_mapping().mmap().get_byte_buffer()): - offset = match.start() - # FIXME, TU what is value for? - value = match.group(0) - if isinstance(value, list): - value = ''.join([chr(x) for x in match.group()]) - vaddr = offset + self.get_search_mapping().start - self._check_steps(vaddr) # be verbose - self._values.add((vaddr, value)) - return self._values - - def __iter__(self): - ''' Iterate over the mapping to find all valid matches ''' - log.debug( - 'iterate %s mapping for matching values' % - (self.get_search_mapping())) - for match in self.pattern.finditer( - self.get_search_mapping().mmap().get_byte_buffer()): - offset = match.start() - value = match.group(0) # [] of int ? - if isinstance(value, list): - value = ''.join([chr(x) for x in match.group()]) - vaddr = offset + self.get_search_mapping().start - self._check_steps(vaddr) # be verbose - yield (vaddr, value) - return - - def test_match(self, vaddr): - return True - -#EmailRegexp = r'''[a-zA-Z0-9+_\-\.]+@[0-9a-zA-Z][.-0-9a-zA-Z]*.[a-zA-Z]+''' -EmailRegexp = r'''((\"[^\"\f\n\r\t\v\b]+\")|([\w\!\#\$\%\&\'\*\+\-\~\/\^\`\|\{\}]+(\.[\w\!\#\$\%\&\'\*\+\-\~\/\^\`\|\{\}]+)*))@((\[(((25[0-5])|(2[0-4][0-9])|([0-1]?[0-9]?[0-9]))\.((25[0-5])|(2[0-4][0-9])|([0-1]?[0-9]?[0-9]))\.((25[0-5])|(2[0-4][0-9])|([0-1]?[0-9]?[0-9]))\.((25[0-5])|(2[0-4][0-9])|([0-1]?[0-9]?[0-9])))\])|(((25[0-5])|(2[0-4][0-9])|([0-1]?[0-9]?[0-9]))\.((25[0-5])|(2[0-4][0-9])|([0-1]?[0-9]?[0-9]))\.((25[0-5])|(2[0-4][0-9])|([0-1]?[0-9]?[0-9]))\.((25[0-5])|(2[0-4][0-9])|([0-1]?[0-9]?[0-9])))|((([A-Za-z0-9\-])+\.)+[A-Za-z\-]+))''' -URLRegexp = r'''[a-zA-Z0-9]+://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+''' -# URIRegexp = -# r'''#^([a-z0-9+\-.]+):([/]{0,2}([a-z0-9\-._~%!\$&'\(\)\*+,;=:]+@)?([\[\]a-z0-9\-._~%!\$&'\(\)\*+,;=:]+(:[0-9]+)?))([a-z0-9\-._~%!\$&'\(\)\*+,;=:@/]*)(\?[\?/a-z0-9\-._~%!\$&'\(\)\*+,;=:@]+)?(\#[a-z0-9\-._~%!\$&'\(\)\*+,;=:@/\?]+)?#i''' -WinFileRegexp = r'''([a-zA-Z]\:)(\\[^\\/:*?<>"|]*(?|]+([ ]+[^ \\/:*?""<>|]+)*)*\\?''' -#UNCRegexp = r'(([a-zA-Z]:|\\)\\)?(((\.)|(\.\.)|([^\\/:\*\?"\|<>\. ](([^\\/:\*\?"\|<>\. ])|([^\\/:\*\?"\|<>]*[^\\/:\*\?"\|<>\. ]))?))\\)*[^\\/:\*\?"\|<>\. ](([^\\/:\*\?"\|<>\. ])|([^\\/:\*\?"\|<>]*[^\\/:\*\?"\|<>\. ]))?' - - -def looksLikeUTF8(bytearray): - p = re.compile("\\A(\n" + - r" [\\x09\\x0A\\x0D\\x20-\\x7E] # ASCII\\n" + - r"| [\\xC2-\\xDF][\\x80-\\xBF] # non-overlong 2-byte\n" + - r"| \\xE0[\\xA0-\\xBF][\\x80-\\xBF] # excluding overlongs\n" + - r"| [\\xE1-\\xEC\\xEE\\xEF][\\x80-\\xBF]{2} # straight 3-byte\n" + - r"| \\xED[\\x80-\\x9F][\\x80-\\xBF] # excluding surrogates\n" + - r"| \\xF0[\\x90-\\xBF][\\x80-\\xBF]{2} # planes 1-3\n" + - r"| [\\xF1-\\xF3][\\x80-\\xBF]{3} # planes 4-15\n" + - r"| \\xF4[\\x80-\\x8F][\\x80-\\xBF]{2} # plane 16\n" + - r")*\\z", re.VERBOSE) - - phonyString = bytearray.encode("ISO-8859-1") - return p.matcher(phonyString).matches() - -''' -lib["email"] = re.compile(r"(?:^|\s)[-a-z0-9_.]+@(?:[-a-z0-9]+\.)+[a-z]{2,6}(?:\s|$)",re.IGNORECASE) -lib["postcode"] = re.compile("[a-z]{1,2}\d{1,2}[a-z]?\s*\d[a-z]{2}",re.IGNORECASE) -lib["zipcode"] = re.compile("\d{5}(?:[-\s]\d{4})?") -lib["ukdate"] = re.compile \ -("[0123]?\d[-/\s\.](?:[01]\d|[a-z]{3,})[-/\s\.](?:\d{2})?\d{2}",re.IGNORECASE) -lib["time"] = re.compile("\d{1,2}:\d{1,2}(?:\s*[aApP]\.?[mM]\.?)?") -lib["fullurl"] = re.compile("https?://[-a-z0-9\.]{4,}(?::\d+)?/[^#?]+(?:#\S+)?",re.IGNORECASE) -lib["visacard"] = re.compile("4\d{3}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}") -lib["mastercard"] = re.compile("5[1-5]\d{2}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}") -lib["phone"] = re.compile("0[-\d\s]{10,}") -lib["ninumber"] = re.compile("[a-z]{2}\s?\d{2}\s?\d{2}\s?\d{2}\s?[a-z]",re.IGNORECASE) -lib["isbn"] = re.compile("(?:[\d]-?){9}[\dxX]") - ''' - - -def makeSizeCaches(dumpname): - ''' gets all allocators instances from the dump, order them by size.''' - from haystack.reverse import context - log.debug('\t[-] Loading the context for a dumpname.') - ctx = context.get_context(dumpname) - log.debug('\t[-] Make the size dictionnaries.') - sizeCache = StructureSizeCache(ctx) - sizeCache.cacheSizes() - - return ctx, sizeCache - - -def buildStructureGroup(context, sizeCache, optsize=None): - ''' Iterate of structure instances grouped by size, find similar signatures, - and outputs a list of groups of similar allocators instances.''' - log.debug("\t[-] Group allocators's signatures by sizes.") - sgms = [] - # - for size, lst in sizeCache: - if optsize is not None: - if size != optsize: - continue # ignore different size - log.debug("\t[-] Group signatures for allocators of size %d" % size) - sgm = SignatureGroupMaker(context, 'structs.%x' % size, lst) - if sgm.isPersisted(): - sgm.load() - else: - sgm.make() - sgm.persist() - sgms.append(sgm) - - # TODO DEBUG - # if len(lst) >100: - # log.error('too big a list, DELETE THIS ') - # continue - # #return - - # make a chain and use --originAddr - log.debug( - '\t[-] Sort %d structs of size %d in groups' % - (len(lst), size)) - graph = networkx.Graph() - # add similarities as linked structs - graph.add_edges_from(sgm.getGroups()) - # add all structs all nodes . Should spwan isolated graphs - graph.add_nodes_from(lst) - subgraphs = networkx.algorithms.components.connected.connected_component_subgraphs( - graph) - # print 'subgraphs', len(subgraphs) - chains = [g.nodes() for g in subgraphs] - # TODO, do not forget this does only gives out structs with similarities. - # lonely structs are not printed here... - yield chains - - -def printStructureGroups(context, chains, originAddr=None): - chains.sort() - decoder = dsa.FieldReverser(context.memory_handler) - for chain in chains: - log.debug('\t[-] chain len:%d' % len(chain)) - if originAddr is not None: - if originAddr not in chain: - continue # ignore chain if originAddr is not in it - for addr in map(long, chain): - record = context.get_record_for_address(addr) - ##record.decodeFields() # can be long - decoder.analyze_fields(record) - print(context.get_record_for_address(addr).to_string()) - print('#', '-' * 78) - - -def graphStructureGroups(context, chains, originAddr=None): - # TODO change generic fn - chains.sort() - decoder = dsa.FieldReverser(context.memory_handler) - graph = networkx.DiGraph() - for chain in chains: - log.debug('\t[-] chain len:%d' % len(chain)) - if originAddr is not None: - if originAddr not in chain: - continue # ignore chain if originAddr is not in it - for addr in map(long, chain): - record = context.get_record_for_address(addr) - ## record.decodeFields() # can be long - decoder.analyze_fields(record) - print(context.get_record_for_address(addr).to_string()) - targets = set() - _record = context.get_record_for_address(addr) - pointer_fields = [f for f in _record.get_fields() if f.is_pointer()] - for f in pointer_fields: - addr_child = f.get_value_for_field(_record) - child = context.get_record_at_address(addr) - targets.add(('%x' % addr, '%x' % child.address)) - graph.add_edges_from(targets) - print('#', '-' * 78) - networkx.readwrite.gexf.write_gexf( - graph, - config.get_cache_filename( - config.CACHE_GRAPH, - context.dumpname)) - - - -# FIXME ongoing TypeReverser -# TODO next next step, compare struct links in a DiGraph with node == -# struct size + pointer index as a field. -def makeReversedTypes(heap_context, sizeCache): - ''' Compare signatures for each size groups. - Makes a chains out of similar allocators. Changes the structure names for a single - typename when possible. Changes the ctypes types of each pointer field.''' - - log.info( - '[+] Build groups of similar instances, create a reversed type for each group.') - for chains in buildStructureGroup(heap_context, sizeCache): - fixType(heap_context, chains) - - log.info('[+] For each instances, fix pointers fields to newly created types.') - decoder = dsa.FieldReverser(heap_context.memory_handler) - for s in heap_context.listStructures(): - s.reset() - ## s.decodeFields() - decoder.reverse_record(heap_context, s) - pointer_fields = [f for f in s.get_fields() if f.is_pointer()] - for f in pointer_fields: - addr = f.get_value_for_field(s) - if addr in heap_context.heap: - try: - ctypes_type = heap_context.get_record_at_address( - addr).get_ctype() - # we have escapees, withouth a typed type... saved them from - # exception - except TypeError as e: - ctypes_type = fixInstanceType( - heap_context, - heap_context.get_record_at_address(addr), - getname()) - #f.setCtype(ctypes.POINTER(ctypes_type)) - f.set_pointee_ctype(ctypes.POINTER(ctypes_type)) - f.set_comment('pointer fixed') - - log.info('[+] For new reversed type, fix their definitive fields.') - for revStructType in heap_context.list_reversed_types(): - revStructType.makeFields(heap_context) - - # poitners not in the heap - # for s in context.listStructures(): - # for f in s.getPointerFields(): - # if ctypes.is_void_pointer_type(f.getCtype()): - # print s,'has a c_void_p field', f._getValue(0), - # print context.getStructureForOffset( f._getValue(0) ) - - return heap_context - - -def makeSignatures(dumpname): - from haystack.reverse import context - log.debug('\t[-] Loading the context for a dumpname.') - ctx = context.get_context(dumpname) - heap = ctx.heap - - log.info('[+] Make the signatures.') - sigMaker = SignatureMaker(heap) - sig = sigMaker.search() - return ctx, sig - - -def makeGroupSignature(context, sizeCache): - ''' From the allocators cache ordered by size, group similar instances together. ''' - log.info("[+] Group allocators's signatures by sizes.") - sgms = [] - try: - for size, lst in sizeCache: - log.debug( - "[+] Group signatures for allocators of size %d" % - size) - sgm = SignatureGroupMaker(context, 'structs.%x' % size, lst) - sgm.make() - sgm.persist() - sgms.append(sgm) - except KeyboardInterrupt as e: - pass - return context, sgms - -# FIXME: 100 maybe is a bit short -try: - import pkgutil - _words = pkgutil.get_data(__name__, config.WORDS_FOR_REVERSE_TYPES_FILE) -except ImportError: - import pkg_resources - _words = pkg_resources.resource_string( - __name__, - config.WORDS_FOR_REVERSE_TYPES_FILE) - -# global -_NAMES = [s.strip() for s in _words.split(b'\n')[:-1]] -_NAMES_plen = 1 - - -def getname(): - global _NAMES, _NAMES_plen - if len(_NAMES) == 0: - _NAMES_plen += 1 - _NAMES = [ - ''.join(x) for x in itertools.permutations( - _words.split('\n')[ - :- - 1], - _NAMES_plen)] - return _NAMES.pop() - - -def fixType(context, chains): - ''' Fix the name of each structure to a generic word/type name ''' - for chain in chains: - name = getname() - log.debug( - '\t[-] fix type of chain size:%d with name name:%s' % - (len(chain), name)) - for addr in chain: # chain is a numpy - addr = int(addr) - # FIXME - instance = context.get_record_for_address(addr) - # - ctypes_type = fixInstanceType(context, instance, name) - return - - -def fixInstanceType(context, instance, name): - # TODO if instance.isFixed, return instance.getCtype() - instance.set_name(name) - ctypes_type = context.get_reversed_type(name) - if ctypes_type is None: # make type - ctypes_type = structure.ReversedType.create(context, name) - ctypes_type.addInstance(instance) - instance.set_ctype(ctypes_type) - return ctypes_type - - -if __name__ == '__main__': - pass diff --git a/haystack/reverse/lrucache.py b/haystack/reverse/lrucache.py deleted file mode 100644 index 5dc87d7b..00000000 --- a/haystack/reverse/lrucache.py +++ /dev/null @@ -1,221 +0,0 @@ -# lrucache.py -- a simple LRU (Least-Recently-Used) cache class - -# Copyright 2004 Evan Prodromou -# Licensed under the Academic Free License 2.1 - -# arch-tag: LRU cache main module - -# haystack-comment: used in structure.py:CacheWrapper - -from __future__ import print_function, generators - -"""a simple LRU (Least-Recently-Used) cache module - -This module provides very simple LRU (Least-Recently-Used) cache -functionality. - -An *in-memory cache* is useful for storing the results of an -'expensive' process (one that takes a lot of time or resources) for -later re-use. Typical examples are accessing data from the filesystem, -a database, or a network location. If you know you'll need to re-read -the data again, it can help to keep it in a cache. - -You *can* use a Python dictionary as a cache for some purposes. -However, if the results you're caching are large, or you have a lot of -possible results, this can be impractical memory-wise. - -An *LRU cache*, on the other hand, only keeps _some_ of the results in -memory, which keeps you from overusing resources. The cache is bounded -by a maximum size; if you try to add more values to the cache, it will -automatically discard the values that you haven't read or written to -in the longest time. In other words, the least-recently-used items are -discarded. [1]_ - -.. [1]: 'Discarded' here means 'removed from the cache'. - -""" - -import time -from heapq import heappush, heappop, heapify - -__version__ = "0.2" -__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE'] -__docformat__ = 'reStructuredText en' - -DEFAULT_SIZE = 16 -"""Default size of a new LRUCache object, if no 'size' argument is given.""" - - -class CacheKeyError(KeyError): - - """Error raised when cache requests fail - - When a cache record is accessed which no longer exists (or never did), - this error is raised. To avoid it, you may want to check for the existence - of a cache record before reading or deleting it.""" - pass - - -class LRUCache(object): - - """Least-Recently-Used (LRU) cache. - - Instances of this class provide a least-recently-used (LRU) cache. They - emulate a Python mapping type. You can use an LRU cache more or less like - a Python dictionary, with the exception that objects you put into the - cache may be discarded before you take them out. - - Some example usage:: - - cache = LRUCache(32) # new cache - cache['foo'] = get_file_contents('foo') # or whatever - - if 'foo' in cache: # if it's still in cache... - # use cached version - contents = cache['foo'] - else: - # recalculate - contents = get_file_contents('foo') - # store in cache for next time - cache['foo'] = contents - - print cache.size # Maximum size - - print len(cache) # 0 <= len(cache) <= cache.size - - cache.size = 10 # Auto-shrink on size assignment - - for i in range(50): # note: larger than cache size - cache[i] = i - - if 0 not in cache: print 'Zero was discarded.' - - if 42 in cache: - del cache[42] # Manual deletion - - for j in cache: # iterate (in LRU order) - print j, cache[j] # iterator produces keys, not values - """ - - class __Node(object): - - """Record of a cached value. Not for public consumption.""" - - def __init__(self, key, obj, timestamp): - object.__init__(self) - self.key = key - self.obj = obj - self.atime = timestamp - self.mtime = self.atime - - def __lt__(self, other): - return self.atime < other.atime - - def __repr__(self): - return "<%s %s => %s (%s)>" % \ - (self.__class__, self.key, self.obj, - time.asctime(time.localtime(self.atime))) - - def __init__(self, size=DEFAULT_SIZE): - # Check arguments - if size <= 0: - raise ValueError(size) - elif not isinstance(size, type(0)): - raise TypeError(size) - object.__init__(self) - self.__heap = [] - self.__dict = {} - self.size = size - """Maximum size of the cache. - If more than 'size' elements are added to the cache, - the least-recently-used ones will be discarded.""" - - def __len__(self): - return len(self.__heap) - - def __contains__(self, key): - return key in self.__dict - - def __setitem__(self, key, obj): - if key in self.__dict: - node = self.__dict[key] - node.obj = obj - node.atime = time.time() - node.mtime = node.atime - heapify(self.__heap) - else: - # size may have been reset, so we loop - while len(self.__heap) >= self.size: - lru = heappop(self.__heap) - del self.__dict[lru.key] - node = self.__Node(key, obj, time.time()) - self.__dict[key] = node - heappush(self.__heap, node) - - def __getitem__(self, key): - if key not in self.__dict: - raise CacheKeyError(key) - else: - node = self.__dict[key] - node.atime = time.time() - heapify(self.__heap) - return node.obj - - def __delitem__(self, key): - if key not in self.__dict: - raise CacheKeyError(key) - else: - node = self.__dict[key] - del self.__dict[key] - self.__heap.remove(node) - heapify(self.__heap) - return node.obj - - def __iter__(self): - copy = self.__heap[:] - while len(copy) > 0: - node = heappop(copy) - yield node.key - raise StopIteration - - def __setattr__(self, name, value): - object.__setattr__(self, name, value) - # automagically shrink heap on resize - if name == 'size': - while len(self.__heap) > value: - lru = heappop(self.__heap) - del self.__dict[lru.key] - - def __repr__(self): - return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap)) - - def mtime(self, key): - """Return the last modification time for the cache record with key. - May be useful for cache instances where the stored values can get - 'stale', such as caching file or network resource contents.""" - if key not in self.__dict: - raise CacheKeyError(key) - else: - node = self.__dict[key] - return node.mtime - -if __name__ == "__main__": - cache = LRUCache(25) - print(cache) - for i in range(50): - cache[i] = str(i) - print(cache) - if 46 in cache: - del cache[46] - print(cache) - cache.size = 10 - print(cache) - cache[46] = '46' - print(cache) - print(len(cache)) - for c in cache: - print(c) - print(cache) - print(cache.mtime(46)) - for c in cache: - print(c) diff --git a/haystack/reverse/matchers.py b/haystack/reverse/matchers.py deleted file mode 100644 index a9927729..00000000 --- a/haystack/reverse/matchers.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# - - -class AbstractMatcher(object): - """ - Search for pointers by checking if the word value is a valid addresses in memspace. - """ - def __init__(self, memory_handler): - self._memory_handler = memory_handler - - def is_valid_address_value(self, vaddr): - return self._memory_handler.is_valid_address_value(vaddr) - - def test_match(self, mapping, vaddr): - """ - Test function to implement by the class - mapping: IMemoryMapping - vaddr: long - - returns: bool - """ - raise NotImplementedError - - -class AbstractMatcherWithValue(object): - """ - Search for pointers by checking if the word value is a valid addresses in memspace. - """ - def __init__(self, memory_handler): - self._memory_handler = memory_handler - - def is_valid_address_value(self, vaddr): - return self._memory_handler.is_valid_address_value(vaddr) - - def test_match(self, mapping, vaddr): - """ - Test function to implement by the class - mapping: IMemoryMapping - vaddr: long - - returns: (bool, value) or (False, None) if not matched - """ - raise NotImplementedError - - -class PointerSearcher(AbstractMatcher): - """ - Search for pointers by checking if the word value is a valid addresses in memspace. - """ - def test_match(self, mapping, vaddr): - try: - word = mapping.read_word(vaddr) - except ValueError as e: - mapping = self._memory_handler.get_mapping_for_address(vaddr) - word = mapping.read_word(vaddr) - if self.is_valid_address_value(word): - return True - return False - - -class NullSearcher(AbstractMatcher): - """ - Search for Nulls words in memspace. - """ - def test_match(self, mapping, vaddr): - try: - word = mapping.read_word(vaddr) - except ValueError as e: - # we fetch the proper mapping - mapping = self._memory_handler.get_mapping_for_address(vaddr) - word = mapping.read_word(vaddr) - if word == 0: - return True - return False - - -class PointerEnumerator(AbstractMatcherWithValue): - """ - Search for pointers by checking if the word value is a valid addresses in memspace. - return the value of the pointer. - """ - def test_match(self, mapping, vaddr): - try: - word = mapping.read_word(vaddr) - except ValueError as e: - # we fetch the proper mapping - mapping = self._memory_handler.get_mapping_for_address(vaddr) - word = mapping.read_word(vaddr) - if self.is_valid_address_value(word): - return True, word - return False, None diff --git a/haystack/reverse/pattern.py b/haystack/reverse/pattern.py deleted file mode 100644 index 41d87cfc..00000000 --- a/haystack/reverse/pattern.py +++ /dev/null @@ -1,1331 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -Build pattern out of signatures. -List of tools designed to create signatures for allocations. -That should allow to do reverse guesswork of patterns (pointers) -and therefore identify similar record types allocations. -""" - -import logging -import argparse -import pickle -import sys -import struct -import itertools -import collections -import numbers -import os -from future.builtins import range - -from haystack import dump_loader -from haystack.reverse import config -from haystack.reverse import utils -from haystack.reverse import matchers -from haystack.reverse import searchers - -__author__ = "Loic Jaquemet" -__copyright__ = "Copyright (C) 2012 Loic Jaquemet" -__license__ = "GPL" -__maintainer__ = "Loic Jaquemet" -__email__ = "loic.jaquemet+python@gmail.com" -__status__ = "Production" - - -log = logging.getLogger('pattern') - - -class Dummy(object): - pass - - -def findPatternText(sequence, elSize=1, minNbGroup=2): - """ - returns a regexp grouping repetitive patterns. - - @param sequence: a sequence (str/bstr) with rfind() method. - @param elsize: the size of each element ( 1 to xxx ) in the sequence. - @param minNbGroup: the minimum number of repetition before trying to group the pattern. - - Examples: - >>> from haystack.reverse import pattern - >>> s = 'aaaaa1111bbbccda2a2a2a2a2b1cb1cb1cb1cabcdabcdabcdabcdpooiiiuuuuyyyyy' - >>> pattern.findPatternText(s,1) - ' (a){5} (1){4} (b){3} (c){2} d (a2){5} (b1c){4} (abcd){4} p (o){2} (i){3} (u){4} (y){5} ' - - >>> s = 'aaaaa1111bbbccda2a2a2a2a2b1cb1cb1cb1cabcdabcdabcdabcdpooiiiuuuuyyyyy' - >>> pattern.findPatternText(s,1,5) - ' (a){5} 1111bbbccd (a2){5} b1cb1cb1cb1cabcdabcdabcdabcdpooiiiuuuu (y){5} ' - """ - ret = findPattern(sequence, elSize, minNbGroup) - s = '' - for nb, txt in ret: - if nb == 1: - s += txt - else: - s += ' (%s){%d} ' % (txt, nb) - return s - - -def findPattern(sequence, elSize=1, minNbGroup=2): - """ - returns a regexp grouping repetitive patterns. - - @param sequence: a sequence (str/bstr) with rfind() method. - @param elsize: the size of each element ( 1 to xxx ) in the sequence. - @param minNbGroup: the minimum number of repetition before trying to group the pattern. - - Examples: - >>> from haystack.reverse import pattern - >>> s = 'aaaaa1111bbbccda2a2a2a2a2b1cb1cb1cb1cabcdabcdabcdabcdpooiiiuuuuyyyyy' - >>> pattern.findPattern(s,1) - [(5, 'a'), (4, '1'), (3, 'b'), (2, 'c'), (1, 'd'), (5, 'a2'), (4, 'b1c'), (4, 'abcd'), (1, 'p'), (2, 'o'), (3, 'i'), (4, 'u'), (5, 'y')] - - >>> s = 'aaaaa1111bbbccda2a2a2a2a2b1cb1cb1cb1cabcdabcdabcdabcdpooiiiuuuuyyyyy' - >>> pattern.findPattern(s,1,5) - [(5, 'a'), (1, '1111bbbccd'), (5, 'a2'), (1, 'b1cb1cb1cb1cabcdabcdabcdabcdpooiiiuuuu'), (5, 'y')] - - """ - if (len(sequence) % elSize) != 0: - pass # DEBUG TODO DELETE bypass needed for textprintout - #raise ValueError('your sequence length:%d has to be a multiple of element size:%d'%(len(sequence),elSize)) - elif sequence == '': - return [] - - patterns = [] - for seqlen in range(elSize, 1 + (len(sequence) // 2)): - seqs = [ - sequence[ - i:i + - seqlen] for i in range( - 0, - len(sequence) - - seqlen + - 1, - elSize)] # i %elSize, aligned on the elSize - for value, nb in collections.Counter(seqs).most_common(): - # try repetition as long as it is > to minNbGroup - while nb >= minNbGroup: - ind = sequence.rfind(value * nb) # find the fulltext pattern - while ind != -1: # not found - patterns.append( - (nb * - len(value), - ind, - nb, - value)) # biggest is best, ind++ is better, large nb best - ind = sequence.rfind( - value * - nb, - 0, - ind) # find it at another offset - nb -= 1 # try with a smaller number of repetition - # - if len(patterns) == 0: - return [(1, sequence)] - - patterns = sorted(set(patterns)) - best = patterns[-1] # higher wins - - # print 'BEST:', best, best[0], best[3][:elSize], best[3][elSize:] - # print 'found new patterns :' - # for p in patterns: - # sequence2 = sequence.replace( p[3]*p[2], ' (%s){%d} '%(p[3],p[2]) ) - # print p, sequence2 - - i = sequence.find(best[3] * best[2]) - left = sequence[:i] - right = sequence[i + best[0]:] - log.debug('left %d:%s' % (len(left), left)) - log.debug('right %d:%s' % (len(right), right)) - ret = findPattern(left, elSize, minNbGroup) - ret2 = findPattern(right, elSize, minNbGroup) - return ret + [(best[2], best[3])] + ret2 - - -class PatternEncoder: - - def __init__(self, sequence, minGroupSize): - self.basicElements = set(sequence) - self.sequence = sequence - self.nb = len(self.basicElements) - self.minGroupSize = minGroupSize - if self.nb == 0: - raise ValueError('empty sequence') - elif self.nb < 0xff: - self.elSize = 1 - elif self.nb < 0xffff: - self.elSize = 2 - elif self.nb < 0xffffff: - self.elSize = 3 - elif self.nb < 0xffffffff: - self.elSize = 4 - else: - raise ValueError( - 'I deny you the right to find patterns for more than 2^32 differents basic elements.') - self._makeDictionnary() - return - - def _makeDictionnary(self): - log.debug('making pattern dictionnary') - self.dict = {} - self.dict_reverse = {} - for i, el in enumerate(self.basicElements): - cod = struct.pack('>L', i)[-self.elSize:] # code 0 to 0xff - self.dict[el] = cod - self.dict_reverse[cod] = el - # dict done - self.sequence_norm = [self.dict[el] for el in self.sequence] - self.sequence_text = b''.join(self.sequence_norm) - log.debug('done making pattern dictionnary %d' % self.elSize) - return - - def makePattern(self): - '''[(5, 'a'), (4, '1'), (3, 'b'), (2, 'c'), (1, 'd'), (5, 'a2'), (4, 'b1c'), .. ''' - # as of today, i do not have any other sequence class support rfind than string, so i have to decapsulate - # a string of findPattern to basic elements - ret = [] - patterns = findPattern( - self.sequence_text, - self.elSize, - self.minGroupSize) - for nb, p in patterns: - plen = len(p) - if plen % self.elSize != 0: - raise ValueError('serious bug in findpattern') - elif nb == 1: - for i in range(0, plen, self.elSize): - ret.append((nb, self.dict_reverse[p[i:i + self.elSize]])) - else: - seq = [self.dict_reverse[p[i:i + self.elSize]] - for i in range(0, plen, self.elSize)] - ret.append((nb, seq)) - - return ret - - -def make(opts): - log.info('Make the signature.') - # head + first word size - memory_handler = dump_loader.load(opts.dumpfiles[0]) - word_size = memory_handler.get_target_platform().get_word_size() - ppMapper = PinnedPointersMapper(word_size) - heap_sig = PointerIntervalSignature(memory_handler, '[heap]') - log.info('pinning offset list created for heap %s.' % heap_sig) - ppMapper.addSignature(heap_sig) - # now do the others - for dumpfile in opts.dumpfiles[1:]: - memory_handler = dump_loader.load(dumpfile) - if memory_handler.get_target_platform().get_word_size() != word_size: - log.error("Differing wordsize between samples") - heap_sig = PointerIntervalSignature(memory_handler, '[heap]') - log.info('pinning offset list created for heap %s.' % heap_sig) - ppMapper.addSignature(heap_sig) - - log.info('Find similar vectors between pointers on all signatures.') - ppMapper.run() - - # we have : - # resolved PinnedPointers on all sigs in ppMapper.resolved - # unresolved PP in ppMapper.unresolved - - # next step - log.info('Pin resolved PinnedPointers to their respective heap.') - - -class PointerIntervalSignature: - - ''' - Wrapper object the list of intervals between pointers identified in the dumpfile. - When the memory is : - P....P..P.PPP.PP.PPPP.PPP.P..P..................P - with P being a Word of 4 bytes which value could be a pointer value. - The signature is - [20,12,8,4,4,8,4,8,4,4,4,8,4,4,8,12,80] - - It abstracts the memory contents to its signature. - ''' - - def __init__(self, memory_handler, pathname='[heap]'): - self.mmap = None - self.mmap_pathname = pathname - self.memory_handler = memory_handler - self.name = memory_handler.get_name() - self.cacheFilenamePrefix = config.get_cache_folder_name(self.name) - self.addressCache = {} - self.sig = None - self._word_size = memory_handler.get_target_platform().get_word_size() - self._feedback = searchers.NoFeedback() - self._get_mapping() - self._load() - - def _get_mapping(self): - # XXX todo this is getHeap... - self.mmap = self.memory_handler._get_mapping(self.mmap_pathname)[0] - return - - def _load(self): - # DO NOT SORT LIST. c'est des sequences. pas des sets. - myname = self.cacheFilenamePrefix + '.pinned' - log.debug('Reading signature from %s',myname) - sig = utils.int_array_cache(myname) - if sig is None: - log.info( - "Signature has to be calculated for %s. It's gonna take a while." % - self.name) - matcher = matchers.PointerSearcher(self.memory_handler) - pointerSearcher = searchers.WordAlignedSearcher(self.mmap, matcher, self._feedback, self._word_size) - #pointerSearcher = matchers.PointerSearcher(self.mmap) - sig = [] - # save first offset - last = self.mmap.start - for i in pointerSearcher: # returns the vaddr - sig.append(i - last) # save intervals between pointers - # print hex(i), 'value:', hex(self.mmap.readWord(i) ) - last = i - # save it - sig = utils.int_array_save(myname, sig) - else: - log.debug("%d Signature intervals loaded from cache." % (len(sig))) - self.sig = sig - # - # previous pointer of interval 0 is start of mmap - self.addressCache[0] = self.mmap.start - self._loadAddressCache() - return - - def _loadAddressCache(self): - # DO NOT SORT LIST. c'est des sequences. pas des sets. - myname = self.cacheFilenamePrefix + '.pinned.vaddr' - if os.access(myname, os.F_OK): - addressCache = pickle.load(open(myname, 'rb')) - log.debug( - "%d Signature addresses loaded from cache." % - (len(addressCache))) - self.addressCache.update(addressCache) - else: # get at least 10 values - for i in range(0, len(self), len(self) // 10): - self.getAddressForPreviousPointer(i) - self._saveAddressCache() - return - - def _saveAddressCache(self): - myname = self.cacheFilenamePrefix + '.pinned.vaddr' - pickle.dump(self.addressCache, open(myname, 'wb')) - - def getAddressForPreviousPointer(self, offset): - ''' - sum all intervals upto the offset. that give us the relative offset. - add to dump.start , and we have the vaddr - We need to sum all up to offset not included. - it we include the offset, we get the second pointer vaddr. - ''' - # use cache my friends - if offset in self.addressCache: - return self.addressCache[offset] - # get closest one - keys = sorted(self.addressCache) - keys = list(itertools.takewhile(lambda x: x < offset, keys)) - last = keys[-1] # take the closest - startValue = self.addressCache[last] # == addr(last-1) - # we are not interested in adding offset interval. that would give us - # the second pointer address - subseq = self.sig[last:offset] - #newsum = startValue + reduce(lambda x,y: x+y, subseq) - #self.addressCache[offset] = newsum - # be proactive +/- 40 Mo - newsum = startValue - for i in range(last, offset): - newsum += self.sig[i] - self.addressCache[i + 1] = newsum - # be proactive - return newsum - - def __len__(self): - return len(self.sig) - - def __str__(self): - return "" % self.name - - -class SequencesMaker: - - ''' - Builds a list of sequences of interval for each interval in the signature. - [2,3,3,4,5,1,2,3,4,5] gives - [(2,3,3), (3,3,4), (3,4,5), (4,5,1), (5,1,2), (1,2,3), (2,3,4), (3,4,5)] - - ''' - - def __init__(self, sequence, size, cacheAll=True): - self.size = size - self.seq = sequence - self.sets = {} # key is sequence len - self.cacheAll = cacheAll - self.findUniqueSequences(self.seq) - - def findUniqueSequences(self, seq): - log.debug('number of intervals: %d' % (len(seq))) - sig_set = set(seq) - log.debug('number of unique intervals value: %d' % (len(sig_set))) - # create the tuple - self.sets[self.size] = set(self.getSeqs()) - log.debug( - 'number of unique sequence len %d : %d' % - (self.size, len( - self.sets[ - self.size]))) - return - - def getSeqs(self): - if not hasattr(self, 'seqs'): - seqlen = self.size - self.seqs = [tuple(self.seq[i:i + seqlen]) - for i in range(0, len(self.seq) - seqlen + 1)] - seqs = self.seqs - return seqs - - def __len__(self): - return len(self.seq) - self.size - - def __iter__(self): - seqlen = self.size - for i in range(0, len(self.seq) - seqlen + 1): - yield tuple(self.seq[i:i + seqlen]) - return - - -class PinnedPointers: - - ''' - A variable length sequence of intervals between pointers. - It already pinned at a specific offset of a signature, - so you might find several instance p1 and p2 at different offset, but with the same sequence - and therefore equal signature. p1 == p2. - It is easily pin onto the initial dump/heap by getAddress() - - @param sequence: the sequence of intervals between pointers - @param sig: the whole signature object linked back to the memoryMap - @param offset: the offset of this interval within the signature - ''' - - def __init__(self, sequence, sig, offset, word_size): - self.sequence = sequence - self.nb_bytes = sum(sequence) + word_size - self.offset = offset - self.sig = sig - self.relations = {} - self.vaddr = None - - def pinned(self, nb=None): - if nb is None: - nb == len(self.sequence) - return self.sequence[:nb] - - def __len__(self): - return len(self.sequence) - - def structLen(self): - return self.nb_bytes - - def __cmp__(self, o): - if len(self) != len(o): - return cmp(len(self), len(o)) - # that means the sequence is different too - if self.structLen() != o.structLen(): - return cmp(self.structLen(), o.structLen()) - if self.sequence != o.sequence: # the structLen can be the same.. - return cmp(self.sequence, o.sequence) - # else offset is totally useless, we have a match - return 0 - - def __contains__(self, other): - raise NotImplementedError - if not isinstance(other, PinnedPointers): - raise ValueError - if other.sig == self.sig: # well, not really - if other.offset >= self.offset and other.offset <= self.offset + \ - len(self): - # if other.sequence in self.sequence: ## need subsearch - return True - return False - - def addRelated(self, other, sig=None): - ''' add a similar PinnedPointer from another offset or another sig ''' - if self != other: - raise ValueError('We are not related PinnedPointers.') - if sig is None: - sig = self.sig - if sig not in self.relations: - self.relations[sig] = list() - self.relations[sig].append(other) - return - - def getAddress(self, numOffset=0): - ''' - return the vaddr of pointer . - by default numOffset == 0 , returns the vaddr of the first interval - ( that migth be the first or second pointer in the struct ) - ''' - if self.vaddr is None: - if numOffset >= len(self.sequence): - raise IndexError - self.vaddr = self.sig.getAddressForPreviousPointer(self.offset) - if numOffset != 0: - return self.sig.getAddressForPreviousPointer( - self.offset + numOffset) - return self.vaddr - - def __str__(self): - return '' % ( - self.sig, self.offset, self.offset + len(self), self.nb_bytes, len(self.sequence) + 1) - - @classmethod - def link(cls, lstOfPinned): - for i, p1 in enumerate(lstOfPinned): - for p2 in lstOfPinned[i + 1:]: - p1.addRelated(p2, p2.sig) - p2.addRelated(p1, p1.sig) - return - - -class AnonymousStructRange: - - ''' - Map a pinnedPointer sequence/signature onto a specific memory at a specific offset. - We are now able to query the structure contents. - - Operators: - __contains__ : if applied by a Number, it will be understoof as a memory address. - if the memory addres is in range of this structure, return True. - in all other cases, return False - __cmp__ : if applied by a Number, it will be understoof as a memory address. - if the memory address is in range of this structure, return 0. - in all other cases, return the __cmp__ of the address compared to the start of the struct - ''' - - def __init__(self, pinnedPointer, word_size): - self.pinnedPointer = pinnedPointer - # by default we start at the first pointer - self.start = pinnedPointer.getAddress() - self.stop = pinnedPointer.getAddress( - len(pinnedPointer)) # by default we stop at the last pointer - # add the length of the last pointer - self.stop += word_size - self.pointers = None - self.pointersTypes = {} - self.pointersValues = None - self.typename = self.makeTypeName() - - def getPointersAddr(self): - if self.pointers is None: - self.pointers = [self.pinnedPointer.getAddress( - i) for i in range(len(self.pinnedPointer) + 1)] - return self.pointers - - def getPointersValues(self): - if self.pointersValues is None: - mmap = self.pinnedPointer.sig.mmap - self.pointersValues = [ - mmap.read_word(addr) for addr in self.getPointersAddr()] - return self.pointersValues - - def setPointerType(self, number, anonStruct): - ''' set a specific pointer to a specific anonStruct type ''' - if anonStruct.sig() != self.sig(): - raise TypeError( - 'You cant type with a AnonStruct from another PointerIntervalSignature. %s vs %s' % - (self, anonStruct)) - if number in self.pointersTypes: - raise IndexError('%s Pointer number %d has already been identified as a type %s - new type : %s' % ( - self, number, self.getPointerType(number).type(), anonStruct.type())) - self.pointersTypes[number] = anonStruct - myself = '' - if self == anonStruct: - myself = ' (MYSELF) ' - log.debug( - 'Set %s pointer number %d to type %s %s' % - (self.type(), - number, - self.getPointerType(number).type(), - myself)) - return - - def getPointerOffset(self, number): - return self.pinnedPointer.getAddress(number) - self.start - - def getPointerType(self, number): - return self.pointersTypes[number] - - def sig(self): - return self.pinnedPointer.sig - - def sequence(self): - return self.pinnedPointer.sequence - - def type(self): - return self.typename - - def __contains__(self, other): - if isinstance(other, numbers.Number): - rel = other - self.start - if rel > len(self) or (rel < 0): - return False - return True - else: - return False - - def __cmp__(self, other): - if other in self: - return 0 - else: - return cmp(self.start, other) - - def __len__(self): - return int(self.stop - self.start) - - def makeTypeName(self): - return 'AnonStruct_%s_%s_%s_%s' % (len(self), len( - self.pinnedPointer), self.pinnedPointer.sig.name, self.pinnedPointer.offset) - - def toCtypesString(self): - s = '' - return - - def __str__(self): - return '<%s>' % (self.type()) - - -class PinnedPointersMapper: - - ''' - a) On identifie les sequences d'intervalles longues ( taille fixe a 20 ). - b) on trouve les sequences communes a toutes les signatures. - c) pour chaque offset de chaque signature, on determine un PinnedPointer - qui couvre la plus grande sequence composee de sequence communes. - *** Erreur possible: la sequence creee en sig1 n'existe pas en sig2. - cas possible si sig2 contient A4 et A5 en deux zones distinces ( A5 == A4[1:]+... - et si sig 1 contient A4A5 en une zone distincte - on se retrouve avec sig A4A5 mais sig2.A4 et sig2.A5 - on peut dans ce cas, redecouper sig1 selon le plus petit denominateur commun de sig2 - -> check routine - d) on linke ces PP entres elles ( central repo serait mieux ) - e) Meta info: on trouve les multiple instances ( same struct, multiple alloc) - ''' - - def __init__(self, word_size, sequenceLength=20): - self.cacheValues2 = {} - self.signatures = [] - self.signatures_sequences = {} - self.started = False - self.common = [] - self.length = sequenceLength - self.word_size = word_size - return - - def addSignature(self, sig): - if self.started: - raise ValueError("Mapping has stated you can't add new signatures") - self.signatures.append(sig) - return - - def _findCommonSequences(self): - log.info('Looking for common sequence of length %d' % self.length) - common = None - # make len(sig) sub sequences of size ( in .sets ) - for sig in self.signatures: - self.signatures_sequences[sig] = SequencesMaker( - sig.sig, - self.length, - False) - if common is None: - common = set(self.signatures_sequences[sig].sets[self.length]) - else: - common &= self.signatures_sequences[sig].sets[self.length] - log.info( - 'Common sequence of length %d: %d seqs' % - (self.length, len(common))) - return common - - def _mapToSignature(self, sig): - # LOL. difflib.SequenceMatcher. - - # maintenant il faut mapper le common set sur l'array original, - # a) on peut iter(sig) jusqu'a trouver une sequence non common. - # b) reduce previous slices to 1 bigger sequence. - # On peut aggreger les offsets, tant que la sequence start:start+ est dans common. - # on recupere un 'petit' nombre de sequence assez larges, censees etre - # communes. - sig_aggregated_seqs = [] - sig_uncommon_slice_offset = [] - start = 0 - stop = 0 - i = 0 - length = self.length - seqs_sig1 = self.signatures_sequences[sig] - common = self.common - # all subsequences, offset by offset - enum_seqs_sig = enumerate(seqs_sig1) - try: - while i < len(seqs_sig1): # we wont have a StopIteration... - for i, subseq in enum_seqs_sig: - if subseq in common: - start = i - #log.debug('Saving a Uncommon slice %d-%d'%(stop,start)) - sig_uncommon_slice_offset.append((stop, start)) - break - del subseq - # enum is on first valid sequence of intervals - #log.debug('Found next valid sequence at interval offset %d/%d/%d'%(i,len(sig.sig), len(seqs_sig1) )) - for i, subseq in enum_seqs_sig: - if subseq in common: - del subseq - continue - # the last interval in the tuple of intervals is - # not common - else: - # so we need to aggregate from [start:stop+length] - # there CAN be another common slice starting between stop and stop+length. - # (1,2,3,4) is common , (1,2,3,4,6) is NOT common because of the 1, (2,3,4,6) is common. - # next valid slice is at start+1 - # so Yes, we can have recovering Sequences - stop = i # end aggregation slice - seqStop = stop + length - 1 - # we should also pin it in sig2, sig3, and relate to - # that... - pp = savePinned( - self.cacheValues2, - sig, - start, - seqStop - - start, - self.word_size) - sig_aggregated_seqs.append(pp) # save a big sequence - #log.debug('Saving an aggregated sequence %d-%d'%(start, stop)) - del subseq - break # goto search next common - # find next valid interval - # wait for end of enum - except StopIteration as e: - pass - # done - # log.debug('%s'%sig1_uncommon_slice_offset) - log.info( - 'There is %d uncommon slice zones in %s' % - (len(sig_uncommon_slice_offset), sig)) - log.info( - 'There is %d common aggregated sequences == struct types in %s' % - (len(sig_aggregated_seqs), sig)) - - return sig_uncommon_slice_offset, sig_aggregated_seqs - - def _findMultipleInstances(self): - allpp = sorted([v for l in self.cacheValues2.values() - for v in l], reverse=True) - unresolved = [] - linkedPP = [] - linked = 0 - multiple = 0 - - for k, g in itertools.groupby(allpp): - l = list(g) - # we can have multiple instances btu not less. - if len(l) < len(mapper.signatures): - unresolved.extend(l) - # print 'not same numbers' - continue - else: - allSigs = True - # we should have all 3 signatures - found = [pp.sig for pp in l] - for s in mapper.signatures: - if s not in found: - unresolved.extend(l) - # print 'not same sigs', s - allSigs = False - break - # if ok, link them all - if allSigs: - PinnedPointers.link(l) - linkedPP.extend(l) - multiple += 1 - linked += len(l) - - unresolved = sorted(unresolved, reverse=True) - linkedPP = sorted(linkedPP, reverse=True) - - self.unresolved = unresolved - self.resolved = linkedPP - log.info( - 'Linked %d PinnedPointers across all PointerIntervalSignatures, %d unique in all Signatures ' % - (linked, multiple)) - log.info( - 'left with %d/%d partially unresolved pp' % - (len(unresolved), len(allpp))) - # cache to disk - # cacheToDisk(self.resolved,'pinned-resolved') - # cacheToDisk(self.unresolved,'pinned-unresolved') - return - - def run(self): - self.started = True - all_common_pp = [] - - CACHE = 'pinned-resolved' - CACHE2 = 'pinned-unresolved' - global mapper - mapper = self - - # drop 1 : find common sequences - self.common = self._findCommonSequences() - - # drop 2: Map sequence to signature, and aggregate overlapping - # sequences. - for sig in self.signatures: - unknown_slices, common_pp = self._mapToSignature(sig) - all_common_pp.extend(common_pp) - - # drop 3: error case, we have been too optimistic about unicity of common sequence. - # lets try and reduce the errors. - # for each structLen, find at least one pp for each sig - - # chance are that only the last interval is botched, so we only have to compare between - # pp1.sequence[:-1] and pp2.sequence[:-1] to find a perfect match - # we nee to find sole pointer. pop all equals in the 3 sigs. - # drop 3: Analyze and find multiple instances of the same Sequence - self._findMultipleInstances() - - # drop 4: Sequence should have been linked, cross-signature. Try to extend them - # On peut pas agrandir les sequences. il n"y a plus de common pattern, - # Par contre, on peut essayer de trouver des sequences plus courtes dans les - # intervalles uncommon_slices - # on peut se servir des pointeur en stack pour trouver les vrai - # start-of-structure. - caches = self._makeCaches() - pickle.dump( - caches, - open( - '/home/jal/Compil/python-haystack/outputs/caches', - 'wb')) - self._pinResolved(caches) - return - - # 3 STEP 2 , pin them on the wall/heap - - def _makeCaches(self): - caches = {} - for sig in self.signatures[:]: - a = Dummy() - resolved_for_sig = [pp for pp in self.resolved if pp.sig == sig] - unresolved_for_sig = [ - pp for pp in self.unresolved if pp.sig == sig] - log.debug('Pin anonymous allocators on %s' % sig) - pinned = [AnonymousStructRange(pp, self.word_size) for pp in resolved_for_sig] - log.debug('Create list of allocators addresses for %s' % sig) - pinned_start = [pp.getAddress() for pp in resolved_for_sig] - # if sorted(pinned_start) != pinned_start: - # log.error('Damn !') - # raise ValueError('iscrewedupbadlyhere') - log.debug('Pin probable anonymous allocators on %s' % sig) - pinned_lightly = [ - AnonymousStructRange(pp, self.word_size) for pp in unresolved_for_sig] - log.debug( - 'Create list of probable allocators addresses for %s' % - sig) - pinned_lightly_start = [pp.getAddress() - for pp in unresolved_for_sig] - # save it - a.pinned = pinned - a.pinned_start = pinned_start - a.pinned_lightly = pinned_lightly - a.pinned_lightly_start = pinned_lightly_start - caches[sig] = a - return caches - - def _pinResolved(self, caches): - #log.debug('Overlapping sequences can happen. we will filter them later using a tree of allocators.') - # for i, pp in enumerate(pinned): - # if pp.start in pinned[i+1:]: - # pass - - # TODO stack pointers value and compare them to pinned_start, - # pinned_lightly_start - - # In each anon structure Pa, get each pointers value. - # If the value is in the list of allocators head addresses, we have a start of struct (mostly true) - # we check Related Struct in the other signatures to see if everybody agrees. - # the parent in sig A (Pa) should point to children type in sig A (Ca) - # the parent in sig B (Pb) should point to children type in sig B (Cb) - # Pa and Pb are related, Ca and Cb should be related too. - sig = self.signatures[0] - pinned = caches[sig].pinned - pinned_start = caches[sig].pinned_start - pinned_lightly = caches[sig].pinned_lightly - pinned_lightly_start = caches[sig].pinned_lightly_start - # for as in pinned, get pointers values and make a tree - log.debug('Going through pointers') - startsWithPointer = 0 - startsMaybeWithPointer = 0 - pointsToStruct = 0 - pointsToStruct2 = 0 - self.startTree = [] - self.startTree2 = [] - self.tree = [] - self.tree2 = [] - startsWithPointerList = self.startTree - startsMaybeWithPointerList = self.startTree2 - pointsToStructList = self.tree - pointsToStructList2 = self.tree2 - for i, ap in enumerate(pinned): - ptrs = ap.getPointersValues() - crosscheck = False - # ptr is the value of pointer number j in the anonymoustruct ap - for j, ptr in enumerate(ptrs): - p_off = ap.getPointerOffset(j) - if ptr in pinned_start: - log.debug( - '--------------------------------------------------------------------------') - log.debug( - 'Lucky guess s:%d, p:%d, we find a pointer to the start of %d PinnedPointer struct.' % - (i, j, pinned_start.count(ptr))) - startsWithPointerList.append((ap, j)) - # check if the same struct in sig2, sig3... points to the - # same target struct - if self._crosscheckChild(caches, ap, j, ptr): - if ap == ap.getPointerType(j): - log.info( - 'ID-ed %s.pointers[%d](0x%x) to type %s (MYSELF)' % - (ap, j, ap.getPointerOffset(j), ap.getPointerType(j))) - else: - log.info( - 'ID-ed %s.pointers[%d](0x%x) to type %s (0x0)' % - (ap, j, ap.getPointerOffset(j), ap.getPointerType(j))) - crosscheck = True - log.debug( - '--------------------------------------------------------------------------') - elif ptr in pinned_lightly_start: - log.debug( - 'Lucky guess s:%d, p:%d we find a pointer to %d maybe-PinnedPointer struct.' % - (i, j, pinned_lightly_start.count(ptr))) - startsMaybeWithPointerList.append((ap, j)) - #log.info('ID-ed %s.pointers[%d] to LIGHTLY'%(ap, j)) - # ptr is in the middle of a anonymous struct - elif ptr in pinned: - pointsToStructList.append((ap, j)) - # check if the same struct in sig2, sig3... points to the - # same target struct - offset = self._crosscheckChildInMiddle(caches, ap, j, ptr) - if offset: - if ap == ap.getPointerType(j): - #p_off = ap.getPointerOffset(j) - # offset - p_off dans la meme structure donne une - # idee de la sequentialite des malloc - log.info( - 'ID-ed %s.pointers[%d](0x%x) to type %s (0x%x) %d' % - (ap, j, p_off, ap.getPointerType(j), offset, offset - p_off)) - prev_p_off = p_off - else: - log.info( - 'ID-ed %s.pointers[%d](0x%x) to type %s (0x%x) ' % - (ap, j, p_off, ap.getPointerType(j), offset)) - elif ptr in pinned_lightly: - pointsToStructList2.append((ap, j)) - #log.info('ID-ed %s.pointers[%d] in LIGHTLY'%(ap, j)) - else: - # the pointer is not in another struct. Find the next - # nearest - first_addr, anonStruct = self._findNearestStruct( - ptr, caches, sig) - # if there is at least one pointer type which crosschecked - if crosscheck: - self._relinkPointers(caches, ap) - # pointer to self means c++ object ? - sig._saveAddressCache() - - log.debug( - 'We have found %d pointers to pinned structs' % - startsWithPointer) - log.debug( - 'We have found %d pointers to pinned maybe-structs' % - startsMaybeWithPointer) - return - - def _findNearestStruct(self, ptr, caches, sig): - pinned = caches[sig].pinned - pinned_start = caches[sig].pinned_start - pinned_lightly = caches[sig].pinned_lightly - pinned_lightly_start = caches[sig].pinned_lightly_start - # - first_addr, anonStruct = self._findFirstStruct( - ptr, pinned_start, pinned) - first_addr_l, anonStruct_l = self._findFirstStruct( - ptr, pinned_lightly_start, pinned_lightly) - if first_addr == first_addr_l and first_addr == -1: - log.warning('No struct after ptr value 0x%x' % ptr) - return -1, None - if first_addr_l < first_addr: # TODO ??? - ret = (anonStruct, first_addr) - else: - ret = (anonStruct_l, first_addr_l) - anonStruct = anonStruct_l - if not anonStruct: - return -1, None - offset = anonStruct.start - ptr - if offset < 64: - log.debug( - 'Found a probable start of struct at %d bytes earlier' % - offset) - return ret - - def _findFirstStruct(self, ptr, addresses, anons): - try: - first_addr = itertools.dropwhile( - lambda x: x < ptr, - addresses).next() - anon = anons[addresses.index(first_addr)] # same index - except StopIteration as e: - return -1, None - return first_addr, anon - - def _crosscheckChild(self, cache, astruct, pointerIndex, ptr): - ''' - we found a parent_1 -> child_1 - check for all other parents ( from other signature) , if their n-th pointer is related to child_1 - - @param cache: cache for all calculated lists - @param ap: the AnonymousStructRange sequence - @param pointerIndex: the index number for the ptr - @param ptr: ptr is the value of pointer number pointerIndex - ''' - perfect = [] - parent_pp = astruct.pinnedPointer - child_astruct = cache[ - parent_pp.sig].pinned[ - cache[ - parent_pp.sig].pinned.index(ptr)] - child_pp = child_astruct.pinnedPointer - perfect.append((astruct, child_astruct)) - - related_child_pps = [] - for sig, pps in child_pp.relations.items(): - related_child_pps.extend(pps) - - other_parent_pps = [] - for sig, pps in parent_pp.relations.items(): - other_parent_pps.extend(pps) - # - for other_parent_pp in other_parent_pps: - sig = other_parent_pp.sig - other_parent_astruct = AnonymousStructRange(other_parent_pp, self.word_size) - other_parent_astruct = cache[sig].pinned[ - cache[sig].pinned.index( - other_parent_astruct.start)] # get the real one - ptr_value = other_parent_astruct.getPointersValues()[pointerIndex] - # get the child at @ptr_value - try: - other_child_astruct = cache[sig].pinned[ - cache[sig].pinned.index(ptr_value)] - except ValueError as e: - return False # children is not the same/ not pinned correctly - other_child_pp = other_child_astruct.pinnedPointer - # we now have the child of the other_parent_pp as per its ptr value - if other_child_pp in related_child_pps: - log.debug('Perfect Match - the other parent-child is ok') - perfect.append((other_parent_astruct, other_child_astruct)) - else: - return False - - for parent, child in perfect: - parent.setPointerType(pointerIndex, child) - - return True - - def _crosscheckChildInMiddle(self, cache, astruct, pointerIndex, ptr): - ''' - we found a parent_1 -> child_1 - check for all other parents ( from other signature) , if their n-th pointer is related to child_1 - - @param cache: cache for all calculated lists - @param ap: the AnonymousStructRange sequence - @param pointerIndex: the index number for the ptr - @param ptr: ptr is the value of pointer number pointerIndex - - return the offset of the pointed bytes from the start of the identified struct - ''' - perfect = [] - parent_pp = astruct.pinnedPointer - child_astruct = cache[ - parent_pp.sig].pinned[ - cache[ - parent_pp.sig].pinned.index(ptr)] - child_offset = ptr - child_astruct.start - child_pp = child_astruct.pinnedPointer - perfect.append((astruct, child_astruct)) - - related_child_pps = [] - for sig, pps in child_pp.relations.items(): - related_child_pps.extend(pps) - - other_parent_pps = [] - for sig, pps in parent_pp.relations.items(): - other_parent_pps.extend(pps) - # - for other_parent_pp in other_parent_pps: - sig = other_parent_pp.sig - other_parent_astruct = AnonymousStructRange(other_parent_pp, self.word_size) - other_parent_astruct = cache[sig].pinned[ - cache[sig].pinned.index( - other_parent_astruct.start)] # get the real one - ptr_value = other_parent_astruct.getPointersValues()[pointerIndex] - # get the child at @ptr_value - try: - other_child_astruct = cache[sig].pinned[ - cache[sig].pinned.index(ptr_value)] - except ValueError as e: - return False # children is not the same/ not pinned correctly - other_child_pp = other_child_astruct.pinnedPointer - # we now have the child of the other_parent_pp as per its ptr value - if other_child_pp in related_child_pps: - other_child_offset = ptr_value - other_child_astruct.start - if other_child_offset == child_offset: - log.debug( - 'Perfect Middle Match - the other parent-child is ok') - else: - log.info( - 'Middle-maych diff %d %d' % - (child_offset, other_child_offset)) - return False - perfect.append((other_parent_astruct, other_child_astruct)) - else: - return False - - for parent, child in perfect: - parent.setPointerType(pointerIndex, child) - - return child_offset - - def _relinkPointers(self, caches, astruct): - pass - - def _checkRelationsHard(self, cache, ap, pointerIndex, ptr): - ''' - go through all related pinned pointers of the other signatures. - check if the targeted pinnedpointer for the pointer number is the same pinnedPointer - than in the sig1. - if its not, find in the other signatures, what is the target struct. - - @param cache: cache for all calculated lists - @param ap: the PinnedPointer sequence - @param pointerIndex: the index number for the ptr - @param ptr: ptr is the value of pointer number pointerIndex - ''' - pp = ap.pinnedPointer - ok = False - mypinned = cache[pp.sig].pinned - mypinned_start = cache[pp.sig].pinned_start - # reverse found a anonstruct covering this ptr value ( start or middle - # ) - anontargetPP = mypinned[mypinned.index(ptr)] - if ptr not in mypinned_start: - log.warning(' ++++++++++++++ ptr not in mypinned_start') - # reverse found a anonstruct covering this ptr value ( start ONLY ) - #anontargetPP = mypinned[mypinned_start.index(ptr)] - log.debug('anontargetPP is %s' % anontargetPP) - targetPP = anontargetPP.pinnedPointer - perfect = [(ap, anontargetPP)] # get ourselves - - # look in other signatures - for sig in self.signatures: - if sig == pp.sig: - continue - ok = False - - # 1 - take the related PinnedPointer from the next signature to the parent PP of our first signature - # and calculate the value of the n-th pointer in that pp for that - # signature. - relatedPPs = pp.relations[sig] # parent struct - if len(relatedPPs) > 1: - log.debug('We have more than one relatedPP to target') - tgtAnons = [ - AnonymousStructRange(relatedPP, self.word_size) for relatedPP in relatedPPs] - tgtPtrs = [tgtAnon.getPointersValues()[pointerIndex] - for tgtAnon in tgtAnons] - - # 2 - take the related PinnedPointer from the next signature to [the n-th pointer/children PP of our first signature] - # if we find one start address that is equal to the previously calculated pointer value - # that means we find a parent-children match in both parent types - # and children types. - ok = 0 - relatedTargetPPs = targetPP.relations[sig] # children struct - for relatedTargetPP in relatedTargetPPs: - addr = AnonymousStructRange(relatedTargetPP, self.word_size).start - log.debug('compare %d and %s' % (addr, tgtPtrs)) - if addr in tgtPtrs: - log.debug( - '** found a perfect match between %s and %s' % - (pp.sig, relatedTargetPP.sig)) - ok += 1 - # on type tous les pointers possible, puis on fera des - # stats sur le ap - # TODO border case, multiple struct pointing to the same - # child - _anon_parent = tgtAnons[tgtPtrs.index(addr)] - _parentStart = _anon_parent.start - parent = cache[sig].pinned[ - cache[sig].pinned_start.index(_parentStart)] - child = cache[sig].pinned[ - cache[sig].pinned_start.index(addr)] - perfect.append((parent, child)) - - # not ok, we did not find a related match on first offset of pinneddpointer. - # that means the targeted struct is either: - # a) not starting with a pointer ( source pointer points before the target pinnedpointer) - # which is weird because, if sig1 if ok, sigX should be ok too. - # b) a bad aggregation has taken place in the target signature. target PP is too big - # maybe we can cut it in halves ? - # c) the pointer stills points to nowhere. we can't be sure of - # anything - if ok != len(relatedTargetPPs): - ok2 = False - for tgtPtr in tgtPtrs: - #log.debug('NOT found a match between %s and %s'%(pp.sig, relatedTargetPP.sig)) - sub = cache[sig].pinned - if tgtPtr in sub: - afound = sub[sub.index(tgtPtr)] - found = afound.pinnedPointer - log.info( - 'Found %d content-pointed struct (not start) in %s' % - (sub.count(tgtPtr), sig)) - log.info(' source pp was %s' % pp) - for myrelatedPP in relatedPPs: - log.info( - ' source related pp was %s' % - myrelatedPP) - log.info( - ' -- got a ptr to %s (0x%x)' % - (found, tgtPtr - found.getAddress())) - sameseq = False - # get start == tgtpp.getAddress(n) , and comp - # tgtpp.sequence[n:n+len] - log.info( - ' source target pp was %s (same seq == %s)' % - (targetPP, sameseq)) - for mytargetPPrelated in relatedTargetPPs: - log.info( - " source's target's related pp was %s (0x%x)" % - (mytargetPPrelated, tgtPtr - mytargetPPrelated.getAddress())) - # we now know that type(found) should be == type(targetPP) - # can we recalculate found and targetPP so they will be related ? - # what to do with related pps of targetPP ? they can be multiple instance.... - # even then, there status of related to targetPP must be severed. we have proof - # they are not the precise instance we are looking for. - seq1 = targetPP - ok2 = True - break - elif tgtPtr in cache[sig].pinned_lightly: - sub = cache[sig].pinned_lightly - afound = sub[sub.index(tgtPtr)] - found = afound.pinnedPointer - log.info( - 'Found %d pointed struct in LIGHTLY %s' % - (sub.count(tgtPtr), sig)) - log.info(' source pp was %s' % pp) - for myrelatedPP in relatedPPs: - log.info( - ' source related pp was %s' % - myrelatedPP) - log.info(' source target pp was %s' % targetPP) - for mytargetPPrelated in relatedTargetPPs: - log.info( - " source's target's related pp was %s" % - mytargetPPrelated) - log.info(' got %s' % found) - - ok2 = True - break - if not ok2: - log.info( - 'This one does not points anywhere to a common pinnedPointer struct %s' % - sig) - break - - # all sig have been parsed and we found a - # type(parent->children_in_pos_x) identical for all parent - perfectSigs = set([parent.sig() for parent, child in perfect]) - if ok and len(perfectSigs) == len(self.signatures): - # save that as a perfect match - # pp and relatedPP and be Id equals. - # targetPP and all perfect[] can be id equals. - for parent, child in perfect: - _mysig = parent.pinnedPointer.sig - parent.setPointerType(pointerIndex, child) - return True - return False - - -def savePinned(cacheValues, sig, offset, match_len, word_size): - pinned = sig.sig[offset:offset + match_len] - pp = PinnedPointers(pinned, sig, offset, word_size) - s = pp.structLen() - if s not in cacheValues: - cacheValues[s] = list() - cacheValues[s].append(pp) - return pp - - -def search(opts): - # - make(opts) - pass - - -def argparser(): - rootparser = argparse.ArgumentParser( - prog='haystack-pattern', - description='Do a discovery structure pattern search.') - rootparser.add_argument( - '--debug', - action='store_true', - help='Debug mode on.') - #rootparser.add_argument('sigfile', type=argparse.FileType('wb'), action='store', help='The output signature filename.') - rootparser.add_argument( - 'dumpfiles', - type=argparse.FileType('rb'), - action='store', - help='Source memory dump by haystack.', - nargs='*') - #rootparser.add_argument('dumpfile2', type=argparse.FileType('rb'), action='store', help='Source memory dump by haystack.') - #rootparser.add_argument('dumpfile3', type=argparse.FileType('rb'), action='store', help='Source memory dump by haystack.') - rootparser.set_defaults(func=search) - return rootparser - - -def main(argv): - parser = argparser() - opts = parser.parse_args(argv) - - level = logging.INFO - if opts.debug: - level = logging.DEBUG - logging.basicConfig(level=level) - logging.getLogger('haystack').setLevel(logging.INFO) - logging.getLogger('dumper').setLevel(logging.INFO) - logging.getLogger('dumper').setLevel(logging.INFO) - - opts.func(opts) - - -# def tests(): -# ''' -#import pattern -#pattern.main('../outputs/skype.1.a ../outputs/skype.2.a ../outputs/skype.3.a'.split()) -# cacheValues=pattern.cache -#common = pattern.common -#mapper = pattern.mapper -# -#''' -# pass - -if __name__ == '__main__': - main(sys.argv[1:]) diff --git a/haystack/reverse/re_string.py b/haystack/reverse/re_string.py deleted file mode 100644 index 256db0ed..00000000 --- a/haystack/reverse/re_string.py +++ /dev/null @@ -1,366 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -import encodings -import logging -import numbers -import string - -""" -This module holds some basic utils function. -""" - -log = logging.getLogger('re_string') - -# -# TODO: need a rfind-style function (rfind_utf16). otherwise O(n2) is laughing on you in struct/fields evaluation. -# TODO: put heuristics of fields determination and allocators algos in subpackages. -# Field and allocators should be POPOs - not controllers. -# -# - - -# nonprintable=[c for c in '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f'] -# control_chars = ''.join(map(unichr, range(0,32) + range(127,160))) - -def is_printable(c): - if isinstance(c, numbers.Number): - c = chr(c) - if c in string.printable and c not in ['\x0b', '\x0c']: - return True - return False - -# Replace with string.printable -#def is_printable(c): -# x = ord(c) -# if 126 < x: -# # if 159 < x: # ascii 8 bits... lets put it aside... -# # return True -# return False -# # else -# if 31 < x: -# return True -# if x == 9 or x == 10 or x == 13: -# return True -# # if x < 32: -# return False - -utf_valid_cc = [b'\x00'] - -_py_encodings = set(encodings.aliases.aliases.values()) -# except IOError: # TODO delete bz2 and gzip -# except TypeError: # TODO delete hex_codec -# except ValueError: # TODO delete uu_encode -_py_encodings.remove('mbcs') -_py_encodings.remove('hex_codec') -_py_encodings.remove('uu_codec') -_py_encodings.remove('bz2_codec') -_py_encodings.remove('zlib_codec') -_py_encodings.remove('base64_codec') -_py_encodings.remove('tactis') -_py_encodings.remove('rot_13') -_py_encodings.remove('quopri_codec') - -# perf test, string.printable is limited to ascii anyway -# ... -# -# TODO you are probably better of accepting only NULL terminated string -# or string terminated by the end of the structure. - -_py_encodings = set(['ascii', - 'latin_1', - 'iso8859_15', - 'utf_8', - 'utf_16le', - 'utf_32le', - ]) - - -def _py3_byte_compat(c): - if isinstance(c, numbers.Number): - assert(0 <= c < 256) - c = chr(c).encode() - return c - -_w = _py3_byte_compat - -# FIXME, is that memoryview in Python3,2.7 ? -class Nocopy: - - def __init__(self, bytes, start, end): - self.bytes = bytes - if start < 0: - start = len(bytes) + start - if end < 0: - end = len(bytes) + end - # print '%s < %s <= %s'%(start, end, len(bytes)) - assert(end <= len(bytes)) - assert(start < end) - assert(start >= 0) - self.start = start - self.end = end - # print 'got',self.bytes[self.start:self.end] - - def __getitem__(self, i): - if i >= 0: - return self.bytes[self.start + i] - else: - return self.bytes[self.end + i] - - # end defaults to int.max, not -1 - def __getslice__(self, start=0, end=-1, step=1): - if end > self.end - self.start: # len(self) - end = self.end - self.start - if step == 1: - if start >= 0 and end >= 0: - return Nocopy(self.bytes, self.start + start, self.start + end) - elif start < 0 and end < 0: - return Nocopy(self.bytes, self.end + start, self.end + end) - else: # screw you - return self.bytes[start:end:step] - - def __eq__(self, o): - to = type(o) - # print self.bytes[self.start:self.end], '==',o - if issubclass(to, str) and self.bytes == o: - return self.start == 0 and self.end == len(o) - elif issubclass(to, Nocopy): - return self.bytes[self.start:self.end] == o.bytes[o.start:o.end] - # else: - return self.bytes[self.start:self.end] == o - - def __len__(self): - return self.end - self.start - - -def _rfind_utf16(bytesarray, longerThan=7): - """@returns index of start string""" - if len(bytesarray) < 4: - return -1 - i = len(bytesarray) - 2 - # give one shot 'x000' - if _w(bytesarray[i + 1]) == b'\x00' and _w(bytesarray[i]) == b'\x00': - i -= 2 - while i >= 0 and (_w(bytesarray[i + 1]) == b'\x00' and _w(bytesarray[i]) != b'\x00'): - i -= 2 - # fix last row - i += 2 - if i == len(bytesarray): - return -1 - size = len(bytesarray) - i - if size > longerThan: - return i - return -1 - - -def rfind_utf16(bytes, offset, size, aligned, word_size): - """ - @returns index from offset where utf16 was found - If the result must be aligned, - a) it is assumed that the bytes index 0 is aligned. - b) any misaligned result will be front-truncated - - :param bytes: the data buffer - :param offset: the offset in the data buffer - :param size: the size of the scope in the buffer - :param aligned: indicate if the result string must be aligned with word boundaries - :param word_size: the size of a word - :return: - """ - # print offset, offset+size - bytes_nocp = Nocopy(bytes, offset, offset + size) - index = _rfind_utf16(bytes_nocp) - if aligned and index > -1: - # align results - if index % word_size: - index += index % word_size - if index > offset + size - word_size: - return -1 - return index - - -def find_ascii(bytes, offset, size): - '''@returns index from offset where printable ascii was found''' - bytes_nocp = Nocopy(bytes, offset, offset + size) - i = offset - end = offset + size - while i < end and is_printable(bytes[i]): - i += 1 - size = i - offset - if size > 3: - return 0, size - return -1, -1 - - -def try_decode_string(bytesarray, longerThan=3): - ''' try to read string. Null terminated or not - TODO , maybe check for \x00 in index 0 for utf16 and utf32. - ''' - if len(bytesarray) <= longerThan: - return False - i = bytesarray.find(b'\x00') - if i == -1: - # find longuest readable - for i, c in enumerate(bytesarray): - if not is_printable(c): - break - if i <= longerThan: - return False - readable = bytesarray[:i + 1] - ustrings = testAllEncodings(bytesarray[:i + 1]) - else: - ustrings = testAllEncodings(bytesarray) - # all cases - ustrings = [(l, enc, ustr) for l, enc, ustr in ustrings if l > longerThan] - if len(ustrings) == 0: - return False - else: # if len(ustrings) > 5 : # probably an ascii string - valid_strings = [] - i = 0 - for size, codec, chars in ustrings: - log.debug('%s %s' % (codec, repr(chars))) - skip = False - first = None - # check not printable chars ( us ascii... ) - for i, c in enumerate(chars): - # last , NULL terminated. Last because testEncodings should cut - # at '\x00' - if c == b'\x00': - break - if not is_printable(c): - skip = True - if i <= longerThan: - break - log.debug( - 'Not a full string, %s/%d is non printable characters "%s..."' % - (repr(c), - i, - chars[ - :25])) - # else: valid string, but shorter, non null terminated - # FIXME this is BUGGY, utf-16 can also considers single - # bytes. - sizemultiplier = len(b'\x20'.encode(codec)) - slen = sizemultiplier * i - log.debug('shorten at %d - %s' % (slen, chars[:i])) - valid_strings.append((slen, codec, chars[:i])) - break - if skip: - continue - # else - if codec in ['utf_16le', 'utf_32le']: - if bytesarray[1] not in utf_valid_cc: - log.debug( - 'That %s value, with cc %s - not valid ' % - (codec, repr( - bytesarray[1]))) - continue - log.debug('valid entry %s' % chars) - valid_strings.append((size, codec, chars)) - if len(valid_strings) > 0: - valid_strings.sort(reverse=True) - return valid_strings[0] - return False - - -def startsWithNulTerminatedString(bytesarray, longerThan=3): - ''' if there is no \x00 termination, its not a string - that means that if we have a bad pointer in the middle of a string, - the first part will not be understood as a string''' - i = bytesarray.find(b'\x00') - if i == -1: - return False - else: - ustrings = testAllEncodings(bytesarray) - ustrings = [(l, enc, ustr) - for l, enc, ustr in ustrings if l > longerThan] - if len(ustrings) == 0: - return False - else: # len(ustrings) > 5 : # probably an ascii string - notPrintableBool = True - ustring = [[]] - i = 0 - for ustring in ustrings: - #ustring = [(l,enc,s) for l,enc,s in ustrings if enc == 'ascii' ] - # test ascii repr - # if len(ustring) != 1: - # asciis = ustrings # only printable chars even in utf - size = ustring[0] - codec = ustring[1] - chars = ustring[2] - log.debug('%s %s' % (codec, repr(chars))) - # check not printable - notPrintable = [] - for i, c in enumerate(chars): - if c not in string.printable: - notPrintable.append((i, c)) - if (len(notPrintable) / float(len(chars))) > 0.5: - log.debug( - 'Not a string, %d/%d non printable characters "%s..."' % - (len(notPrintable), - i, - chars[ - :25])) - continue - else: - return ustring - return False - -# AnonymousStruct_48_182351808_1: - - -def testAllEncodings(bytesarray): - res = [] - for codec in _py_encodings: - length, my_str = testEncoding(bytesarray, codec) - if length != -1: - res.append((length, codec, my_str)) - res.sort(reverse=True) - log.debug('%d valid decodes: \n%s' % (len(res), str(res))) - return res - - -def testUTF8(bytesarray): - return testEncoding(bytesarray, 'UTF-8') - - -def testUTF16(bytesarray): - return testEncoding(bytesarray, 'UTF-16le') - - -def testUTF32(bytesarray): - return testEncoding(bytesarray, 'UTF-32le') - - -def testEncoding(bytesarray, encoding): - ''' test for null bytes on even bytes - this works only for western txt in utf-16 - ''' - sizemultiplier = len('\x20'.encode(encoding)) - #log.debug('size: %d encoding: %s'%(sizemultiplier, encoding)) - try: - ustr = bytesarray.decode(encoding) - except UnicodeDecodeError: - log.debug( - 'UnicodeDecodeError: %s did not decode that len: %d' % - (encoding, len(bytesarray))) - # print repr(bytesarray) - return -1, None - except Exception as e: - log.error('Error using encoding %s' % encoding) - raise e - i = ustr.find(b'\x00') - if i == -1: - log.debug('%s was ok - but no NULL' % encoding) - end = len(ustr) - # return -1, None - else: - # include NULL - end = i + 1 - - slen = sizemultiplier * end - log.debug('%s is ok - with len %d' % (encoding, slen)) - return slen, ustr[:end] diff --git a/haystack/reverse/searchers.py b/haystack/reverse/searchers.py deleted file mode 100644 index d074a86c..00000000 --- a/haystack/reverse/searchers.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -import logging - -from haystack.reverse import matchers -from haystack.utils import xrange - -""" -A few class that can be used to search a portion of memory - for specific pattern (null values, pointers) -""" - -log = logging.getLogger('searchers') - - -class AFeedbackGiver(object): - """ - Class to give feedback at every step. - """ - def __init__(self, steps_count): - self.count = steps_count - - def get_steps_count(self): - return self.count - - def feedback(self, step, val): - """ make a feedback""" - # log.info('processing vaddr 0x%x'%(val)) - raise NotImplementedError - - -class NoFeedback(AFeedbackGiver): - def __init__(self): - super(NoFeedback, self).__init__(1) - - def feedback(self, step, val): - """ make a feedback""" - log.info('processing step 0x%x', val) - return - - -class AbstractSearcher(object): - """ - Search for something in memspace. - feedback(step, val) will be called each step - matcher.test_match will test value for each word - """ - - def __init__(self, search_mapping, matcher, feedback): - """ - search in searchMapping for something. - """ - self._search_mapping = search_mapping - self._matcher = matcher - self._feedback = feedback - self._values = set() - # init the steps - self._init_steps( - self._search_mapping.start, - self._search_mapping.end, - self._feedback.get_steps_count()) - self._init() - - def _init(self): - if not isinstance(self._matcher, matchers.AbstractMatcher): - raise TypeError("matcher should be a AbstractMatcher") - - def _init_steps(self, start, end, steps): - """ - calculate the steps at which feedback would be given - """ - if steps < 1: - return [] - self.steps = [ - i for i,o in enumerate(range( - start, - end, - (end - start) // steps))] # py 3 compatible - return - - def _check_steps(self, step): - if len(self.steps) == 0: - return - if step > self.steps[0]: - val = self.steps.pop(0) - self._feedback.feedback(step, val) - return - - def get_search_mapping(self): - return self._search_mapping - - -class WordAlignedSearcher(AbstractSearcher): - """ - Search for something in memspace. - feedback(step, val) will be called each step - matcher.test_match will test value for each word - """ - - def __init__(self, search_mapping, matcher, feedback, word_size): - super(WordAlignedSearcher, self).__init__(search_mapping, matcher, feedback) - self._word_size = word_size - - def __iter__(self): - """ Iterate over the mapping to find all valid matches """ - log.debug('iterate %s mapping for matching values', self.get_search_mapping()) - mapping = self.get_search_mapping() - for i, vaddr in enumerate(xrange(mapping.start, mapping.end, self._word_size)): - self._check_steps(i) # be verbose - if self._matcher.test_match(mapping, vaddr): - yield vaddr - return - - def search(self): - """ - Enumerate all values from the self.__iter__ into a array - """ - log.debug('search %s mapping for matching values', self.get_search_mapping()) - self._values = [t for t in self] - return self._values - - -class AllocatedWordAlignedSearcher(WordAlignedSearcher): - """ - Search for something in allocated memspace. - feedback(step, val) will be called each step - matcher.test_match will test value for each word - """ - - def __init__(self, heap_walker, matcher, feedback, word_size): - """ - - :param heap_walker: IHeapWalker - :param matcher: AbstractMatcher - :param feedback: AbstractFeedback - :param word_size: the target platform word_size - """ - # FIXME push get_heap_mapping to IHeapWalker - search_heap = heap_walker._heap_mapping - super(AllocatedWordAlignedSearcher, self).__init__(search_heap, matcher, feedback, word_size) - self._walker = heap_walker - - def __iter__(self): - """ - Iterate over the allocated chunk of this heap mapping to find all valid matches - """ - log.debug('iterate allocated chunks in %s heap mapping for matching values', self.get_search_mapping()) - mapping = self.get_search_mapping() - i = 0 - for vaddr, size in self._walker.get_user_allocations(): - self._check_steps(i) - # check head of chunk - if self._matcher.test_match(mapping, vaddr): - yield vaddr - if size < 2*self._word_size: - continue - # check each offset in that allocated chunk - for vaddr_2 in xrange(vaddr+size, vaddr+size-self._word_size, self._word_size): - i+=1 - self._check_steps(i) - if self._matcher.test_match(mapping, vaddr_2): - yield vaddr_2 - return diff --git a/haystack/reverse/structure.py b/haystack/reverse/structure.py deleted file mode 100644 index 559ac023..00000000 --- a/haystack/reverse/structure.py +++ /dev/null @@ -1,623 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -from __future__ import print_function -import logging -import pickle -import numbers -import weakref -import ctypes -import sys - -import os - -from haystack.reverse import lrucache - - -# -# AnonymousRecord is an instance -# when we start reversing, we create a RecordType with fields. -# -# - -log = logging.getLogger('structure') - -DEBUG_ADDRS = [] - - -def make_filename(_context, _record): - sdir = _context.get_folder_cache_structures() - if not os.path.isdir(sdir): - os.mkdir(sdir) - return os.path.sep.join([sdir, str(_record)]) - - -def make_filename_from_addr(_context, address): - return make_filename(_context, 'struct_%x' % address) - - -def cache_load(_context, address): - # FIXME: unused - dumpname = _context.dumpname - if not os.access(dumpname, os.F_OK): - return None - fname = make_filename_from_addr(_context, address) - p = pickle.load(open(fname, 'rb')) - if p is None: - return None - p.set_memory_handler(_context.memory_handler) - return p - - -def remap_load(_context, address, newmappings): - # FIXME: used by obsolete code - dumpname = _context.dumpname - if not os.access(dumpname, os.F_OK): - return None - fname = make_filename_from_addr(_context, address) - p = pickle.load(open(fname, 'rb')) - if p is None: - return None - # YES we do want to over-write _memory_handler and bytes - p.set_memory_handler(_context.memory_handler) - return p - - -def cache_load_all_lazy(_context): - """ - reload all allocated records with a CacheWrapper. - :param _context: - :return: - """ - dumpname = _context.dumpname - addresses = _context.list_allocations_addresses() - for addr in addresses: - try: - yield addr, CacheWrapper(_context, addr) - except ValueError as e: - log.debug('Record 0x%x not found in cache', addr) - ##raise e - # we do not want to return in error. - # try to load as many as possible. - return - - -class CacheWrapper: - """ - this is kind of a weakref proxy, but hashable - """ - # TODO put that refs in the context - refs = lrucache.LRUCache(5000) - # duh, it works ! TODO: .saveme() on cache eviction - # but there is no memory reduction as the GC does not collect that shit. - # i would guess too many fields, map, context... - - def __init__(self, _context, address): - self.address = address - self._fname = make_filename_from_addr(_context, address) - if not os.access(self._fname, os.F_OK): - raise ValueError("%s does not exists" % self._fname) - self._memory_handler = _context.memory_handler - self.obj = None - - def __getattr__(self, *args): - if self.obj is None or self.obj() is None: # - self._load() - return getattr(self.obj(), *args) - - def unload(self): - if self.address in CacheWrapper.refs: - del CacheWrapper.refs[self.address] - self.obj = None - - def _load(self): - if self.obj is not None: # - if self.obj() is not None: # - return self.obj() - try: - p = pickle.load(open(self._fname, 'rb')) - except EOFError as e: - log.error('Could not load %s - removing it %s', self._fname, e) - os.remove(self._fname) - raise e # bad file removed - if not isinstance(p, AnonymousRecord): - raise EOFError("not a AnonymousRecord in cache. %s", p.__class__) - if isinstance(p, CacheWrapper): - raise TypeError("Why is a cache wrapper pickled?") - p.set_memory_handler(self._memory_handler) - p._dirty = False - CacheWrapper.refs[self.address] = p - self.obj = weakref.ref(p) - return - - def save(self): - if self.obj() is None: - return - self.obj().save() - - def __setstate__(self, d): - log.error('setstate %s' % d) - raise TypeError - - def __getstate__(self): - log.error('getstate %s' % self.__dict__) - raise TypeError - - def __hash__(self): - return hash(self.address) - - def __lt__(self, other): - return self.address < other.address - - def __len__(self): - if self.obj is None or self.obj() is None: # - self._load() - return len(self.obj()) - - #def __cmp__(self, other): - # return cmp(self.address, other.address) - - def __str__(self): - return 'struct_%x' % self.address - - -class StructureNotResolvedError(Exception): - pass - - -# should not be a new style class -class AnonymousRecord(object): - """ - AnonymousRecord in absolute address space. - Comparison between struct is done is relative address space. - """ - - def __init__(self, memory_handler, _address, size, prefix=None): - """ - Create a record instance representing an allocated chunk to reverse. - :param memory_handler: the memory_handler of the allocated chunk - :param _address: the address of the allocated chunk - :param size: the size of the allocated chunk - :param prefix: the name prefix to identify the allocated chunk - :return: - """ - self._memory_handler = memory_handler - self._target = self._memory_handler.get_target_platform() - self.__address = _address - if size <= 0: - raise ValueError("a record should have a positive size") - self._size = size - self._reverse_level = 0 - self.__record_type = RecordType('struct_%x' % self.__address, self._size, []) - self.reset() # set fields - self.set_name(prefix) - return - - @property - def name(self): - return self._name - - @name.setter - def name(self, name): - """ - Sets a name for this record. - :param name: name root for the record - :return: - """ - print("setter") - if name is None: - self._name = self.__record_type.name - else: - self._name = '%s_%x' % (name, self.__address) - - def set_name(self, name): - # deprecated - if name is None: - self._name = self.__record_type.name - else: - self._name = '%s_%x' % (name, self.__address) - - def get_name(self): - return self._name - - @property - def address(self): - return self.__address - - @property - def record_type(self): - return self.__record_type - - @property # TODO add a cache property ? - def bytes(self): - if self._bytes is None: - m = self._memory_handler.get_mapping_for_address(self.__address) - self._bytes = m.read_bytes(self.__address, self._size) - # TODO re_string.Nocopy - return self._bytes - - def reset(self): - self._resolved = False - self._resolvedPointers = False - self._reverse_level = 0 - self._dirty = True - self._ctype = None - self._bytes = None - self.__final = False - return - - def set_record_type(self, record_type, final_type=False): - """ - Assign a reversed record type to this instance. - That will change the fields types and render this record immutable. - Any change will have to change the type of this record. - :param t: - :return: - """ - self.__record_type = record_type - self.__final = final_type - - def get_fields(self): - """ - Return the reversed fields for this record - - :return: list(Field) - """ - # we have to check for RecordField - # return [f for f in self.__record_type.get_fields()] - from haystack.reverse import fieldtypes - _fields = [] - for f in self.__record_type.get_fields(): - if f.is_record(): - _fields.append(fieldtypes.RecordField(self, f.offset, f.name, f.field_type.name, f.get_fields())) - else: - _fields.append(f) - return _fields - - def get_field(self, name): - """ - Return the field named id - :param name: - :return: - """ - for f in self.get_fields(): - if f.name == name: - return f - raise ValueError('No such field named %s', name) - - def saveme(self, _context): - """ - Cache the structure to file if required. - - :return: - """ - if not self._dirty: - return - # double check that the cache folder exists - sdir = _context.get_folder_cache_structures() - # create the cache filename for this structure - fname = make_filename(_context, self) - try: - # FIXME : loops create pickle loops - # print self.__dict__.keys() - log.debug('saving to %s', fname) - pickle.dump(self, open(fname, 'wb')) - except pickle.PickleError as e: - # self.struct must be cleaned. - log.error("Pickling error, file %s removed", fname) - os.remove(fname) - raise e - except TypeError as e: - log.error(e) - # FIXME pickling a cachewrapper ???? - #import code - #code.interact(local=locals()) - except RuntimeError as e: - log.error(e) - print(self.to_string()) - # FIXME: why silent removal igore - except KeyboardInterrupt as e: - # clean it, its stale - os.remove(fname) - log.warning('removing %s' % fname) - ex = sys.exc_info() - raise ex[1](None).with_traceback(ex[2]) - return - - def get_field_at_offset(self, offset): - """ - returns the field at a specific offset in this structure - - :param offset: - :return: - """ - if offset < 0 or offset > len(self): - raise IndexError("Invalid offset") - if self.get_reverse_level() < 10: - raise StructureNotResolvedError("Reverse level %d is too low for record 0x%x", self.get_reverse_level(), self.address) - # find the field - ret = [f for f in self.get_fields() if f.offset == offset] - if len(ret) == 0: - # then check for closest match - ret = sorted([f for f in self.get_fields() if f.offset < offset]) - if len(ret) == 0: - raise ValueError("Offset 0x%x is not in structure?!" % offset) # not possible - # the last field standing is the one ( ordered fields) - ret = ret[-1] - if offset < ret.offset + len(ret): - return ret - # in between fields. Can happens on un-analyzed structure. - # or byte field - raise IndexError('Offset 0x%x is in middle of field at offset 0x%x' % offset, ret.offset) - elif len(ret) != 1: - raise RuntimeError("there shouldn't multiple fields at the same offset") - #ret.sort() - return ret[0] - - def set_memory_handler(self, memory_handler): - self._memory_handler = memory_handler - self._target = self._memory_handler.get_target_platform() - - def get_reverse_level(self): - return self._reverse_level - - def set_reverse_level(self, level): - self._reverse_level = level - - def to_string(self): - # print self.fields - self.get_fields().sort() - field_string_lines = [] - for field in self.get_fields(): - field_value = self.get_value_for_field(field) - field_string_lines.append('\t'+field.to_string(field_value)) - fieldsString = '[ \n%s ]' % (''.join(field_string_lines)) - info = 'rlevel:%d SIG:%s size:%d' % (self.get_reverse_level(), self.get_signature_text(), len(self)) - final_ctypes = 'ctypes.Structure' - # no renaming in instances.. - # if self.__final: - # final_ctypes = self.__record_type.name - # ctypes_def = ''' - #%s = %s # %s - # - #''' % (self.get_name(), final_ctypes, info) - # else: - ctypes_def = ''' -class %s(%s): # %s - _fields_ = %s - -''' % (self.get_name(), final_ctypes, info, fieldsString) - return ctypes_def - - def __contains__(self, other): - """ - Returns true if other is an address included in the record's address space. - - :param other: a memory address - :return: - """ - if isinstance(other, numbers.Number): - # test vaddr in struct instance len - if self.__address <= other <= self.__address + len(self): - return True - return False - else: - raise NotImplementedError(type(other)) - - def __getitem__(self, i): - """ - Return the i-th fields of the structure. - - :param i: - :return: - """ - return self.get_fields()[i] - - def __len__(self): - """ - Return the size of the record allocated space. - :return: - """ - return int(self._size) - - def __cmp__(self, other): - if not isinstance(other, AnonymousRecord): - return -1 - return cmp(self.__address, other.__address) - - def __getstate__(self): - """ the important fields are - _resolvedPointers - _dirty - _vaddr - _name - _resolved - _ctype - _size - _fields - """ - d = self.__dict__.copy() - try: - d['dumpname'] = os.path.normpath(self._memory_handler.get_name()) - except AttributeError as e: - #log.error('no _memory_handler name in %s \n attribute error for %s %x \n %s'%(d, self.__class__, self.vaddr, e)) - d['dumpname'] = None - d['_memory_handler'] = None - d['_bytes'] = None - d['_target'] = None - return d - - def __setstate__(self, d): - self.__dict__ = d - if '_name' not in d: - self.set_name(None) - return - - def __str__(self): - # FIXME, that should probably return self._name - # BUT we need to ensure it does not impact the cache name - return 'struct_%x' % self.__address - - ### pieces of codes that need review. - - def get_signature_text(self): - return ''.join(['%s%d' % (f.get_signature()[0].signature, f.get_signature()[1]) for f in self.get_fields()]) - - def get_signature(self): - return [f.get_signature() for f in self.get_fields()] - - def get_type_signature_text(self): - return ''.join([f.get_signature()[0].signature.upper() for f in self.get_fields()]) - - def get_type_signature(self): - return [f.get_signature()[0] for f in self.get_fields()] - - def get_value_for_field(self, _field, max_len=120): - my_bytes = self._get_value_for_field(_field, max_len) - if isinstance(my_bytes, str): - bl = len(str(my_bytes)) - if bl >= max_len: - my_bytes = my_bytes[:max_len // 2] + '...' + \ - my_bytes[-(max_len // 2):] # idlike to see the end - return my_bytes - - def _get_value_for_field(self, _field, max_len=120): - from haystack.reverse import fieldtypes - word_size = self._target.get_word_size() - if len(_field) == 0: - return '<-haystack no pattern found->' - if _field.is_string(): - if _field.field_type == fieldtypes.STRING16: - try: - my_bytes = "%s" % (repr(self.bytes[_field.offset:_field.offset + _field.size].decode('utf-16'))) - except UnicodeDecodeError as e: - log.error('ERROR ON : %s', repr(self.bytes[_field.offset:_field.offset + _field.size])) - my_bytes = self.bytes[_field.offset:_field.offset + _field.size] - else: - my_bytes = "'%s'" % (self.bytes[_field.offset:_field.offset + _field.size]) - elif _field.is_integer(): - # what about endianness ? - endianess = '<' # FIXME dsa self.endianess - data = self.bytes[_field.offset:_field.offset + word_size] - val = self._target.get_target_ctypes_utils().unpackWord(data, endianess) - return val - elif _field.is_zeroes(): - my_bytes = repr('\\x00'*len(_field)) - elif _field.is_array(): - my_bytes = self.bytes[_field.offset:_field.offset + len(_field)] - elif _field.padding or _field.field_type == fieldtypes.UNKNOWN: - my_bytes = self.bytes[_field.offset:_field.offset + len(_field)] - elif _field.is_pointer(): - data = self.bytes[_field.offset:_field.offset + word_size] - if len(data) != word_size: - print(repr(data), len(data)) - import pdb - pdb.set_trace() - val = self._target.get_target_ctypes_utils().unpackWord(data) - return val - else: # bytearray, pointer... - my_bytes = self.bytes[_field.offset:_field.offset + len(_field)] - return my_bytes - - -class RecordType(object): - """ - The type of a record. - - """ - def __init__(self, name, size, fields): - self.name = name - self.__size = int(size) - self.__fields = fields - self.__fields.sort() - - def get_fields(self): - return [x for x in self.__fields] - - def __len__(self): - return int(self.__size) - - def to_string(self): - # print self.fields - self.__fields.sort() - field_string_lines = [] - for field in self.__fields: - field_string_lines.append('\t'+field.to_string(None)) - fields_string = '[ \n%s ]' % (''.join(field_string_lines)) - info = 'size:%d' % len(self) - ctypes_def = ''' -class %s(ctypes.Structure): # %s - _fields_ = %s - -''' % (self.name, info, fields_string) - return ctypes_def - - -class ReversedType(ctypes.Structure): - """ - A reversed record type. - - TODO: explain the usage. - """ - - @classmethod - def create(cls, _context, name): - ctypes_type = _context.get_reversed_type(name) - if ctypes_type is None: # make type an register it - ctypes_type = type(name, (cls,), {'_instances': dict()}) # leave _fields_ out - _context.add_reversed_type(name, ctypes_type) - return ctypes_type - - @classmethod - def addInstance(cls, anonymousStruct): - """ - add the instance to be a instance of this type - - :param anonymousStruct: - :return: - """ - vaddr = anonymousStruct._vaddr - cls._instances[vaddr] = anonymousStruct - - #@classmethod - # def setFields(cls, fields): - # cls._fields_ = fields - - @classmethod - def getInstances(cls): - return cls._instances - - @classmethod - def makeFields(cls, _context): - # print '****************** makeFields(%s, context)'%(cls.__name__) - root = cls.getInstances().values()[0] - # try: - for f in root.get_fields(): - print(f, f.get_ctype()) - cls._fields_ = [(f.get_name(), f.get_ctype()) for f in root.get_fields()] - # except AttributeError,e: - # for f in root.getFields(): - # print 'error', f.get_name(), f.getCtype() - - #@classmethod - def to_string(self): - fieldsStrings = [] - for attrname, attrtyp in self.get_fields(): # model - # FIXME need ctypesutils. - if self.ctypes.is_pointer_type(attrtyp) and not self.ctypes.is_pointer_to_void_type(attrtyp): - fieldsStrings.append('(%s, ctypes.POINTER(%s) ),\n' % (attrname, attrtyp._type_.__name__)) - else: # pointers not in the heap. - fieldsStrings.append('(%s, %s ),\n' % (attrname, attrtyp.__name__)) - fieldsString = '[ \n%s ]' % (''.join(fieldsStrings)) - - info = 'size:%d' % (self.ctypes.sizeof(self)) - ctypes_def = ''' -class %s(ctypes.Structure): # %s - _fields_ = %s - -''' % (self.__name__, info, fieldsString) - return ctypes_def - diff --git a/haystack/reverse/tmp/ctypes_libdl.py b/haystack/reverse/tmp/ctypes_libdl.py deleted file mode 100644 index 6ff6ccda..00000000 --- a/haystack/reverse/tmp/ctypes_libdl.py +++ /dev/null @@ -1,205 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -from __future__ import print_function - -import ctypes -import logging -import sys - -''' insure ctypes basic types are subverted ''' -from haystack import model - -__author__ = "Loic Jaquemet loic.jaquemet+python@gmail.com" - -log = logging.getLogger('ctypes_libdl') - - -# ============== Internal type defs ============== -class CPP(LoadableMembers): - - ''' defines classRef ''' - pass - - -class A(CPP): - _fields_ = [ - ('a', ctypes.c_uint) - ] - -model.registerModule(sys.modules[__name__]) -############# Start expectedValues and methods overrides ################# - -# test - -import sys -import inspect -src = sys.modules[__name__] - - -def printSizeof(mini=-1): - for (name, klass) in inspect.getmembers( - sys.modules[__name__], inspect.isclass): - # and klass.__module__.endswith('%s_generated'%(__name__) ) : - if isinstance(klass, type(ctypes.Structure)): - if ctypes.sizeof(klass) > mini: - print('%s:' % name, ctypes.sizeof(klass)) - -import time -import subprocess -import os -from subprocess import PIPE -import sys -from subprocess import PIPE, Popen -from threading import Thread - -try: - from Queue import Queue, Empty -except ImportError as e: - from queue import Queue, Empty # python 3.x - -ON_POSIX = 'posix' in sys.builtin_module_names - - -def enqueue_output(out, queue): - for line in iter(out.readline, ''): - queue.put(line) - out.close() - - -def getOutput(p): - q = Queue() - t = Thread(target=enqueue_output, args=(p.stdout, q)) - t.daemon = True # thread dies with the program - t.start() - return q, t - - -def readlines(q): - lines = [] - notEmpty = True - while notEmpty: - try: - line = q.get_nowait() or q.get(timeout=1) - except Empty: - notEmpty = False - else: - lines.append(line) - return lines - - -def dumpMemory(pid, fname): - def dumpit(pid, fname): - from haystack import memory_dumper - memory_dumper.dumpToFile(pid, fname) - t = Thread(target=dumpit, args=(pid, fname)) - t.daemon = True # thread dies with the program - t.start() - t.join() - return fname - - -def makeDumps(): - dumps = [] - - cmd = ['./src/test-ctypes2'] - p = subprocess.Popen(cmd, bufsize=1, stdin=PIPE, stdout=PIPE) - q, t = getOutput(p) - - print('\n -- * init data 4 child pid:', p.pid) - out = ''.join(readlines(q)) - while 'START' not in out: - time.sleep(.1) - out = ''.join(readlines(q)) - fname = dumpMemory(p.pid, 'test-ctypes2.dump.0') - print('[+] dumped clean state in', fname) - dumps.append(open(fname, 'rb')) - - stopMe = False - i = 1 - while not stopMe: - print('[-] sending enter') - p.stdin.write('\n') - out = ''.join(readlines(q)) - while 'OPEN' not in out: - if 'END' in out: - print('[+] this is the END... the only END , my friend...') - stopMe = True - break - time.sleep(.1) - out = ''.join(readlines(q)) - if not stopMe: - fname = dumpMemory(p.pid, 'test-ctypes2.dump.%d' % i) - print('[+] dumped', out.split(' ')[1].strip(), 'in', fname) - dumps.append(open(fname, 'rb')) - i += 1 - - return dumps - -from hashlib import md5 - - -def buildMappingsHashes(maps): - return [(md5.md5(m.mmap().get_byte_buffer()).hexdigest(), m.mmap()) - for m in maps] - - -def getDiff(d1, d2): - from haystack import dump_loader - mappings1 = dump_loader.load(d1) - mappings2 = dump_loader.load(d2) - log.debug('Building hashes for %s' % d1.name) - m1 = dict(buildMappingsHashes(mappings1)) - log.debug('Building hashes for %s' % d2.name) - m2 = dict(buildMappingsHashes(mappings2)) - - # new _memory_handler in d2 - pnames1 = set([m.pathname for m in m1.values()]) - pnames2 = set([m.pathname for m in m2.values()]) - new2 = pnames2 - pnames1 - news = [] - for pathname in new2: - news.extend(mappings2._get_mapping(pathname)) - print('new _memory_handler in %s:' % d2.name) - for n in news: - print(n) - # believe in hash funcs. - diff2 = set(m2.keys()) - set(m1.keys()) - diffs = [] - revm1 = dict((v, k) for k, v in m1.items()) - print('modified _memory_handler in %s:' % d2.name) - for h2 in diff2: - m = m2[h2] - if m.pathname in pnames1: - print(m) - diffs.append(m) - return news, diffs - - -def main(): - logging.basicConfig(level=logging.DEBUG) - - logging.getLogger('dumper').setLevel(logging.ERROR) - logging.getLogger('loader').setLevel(logging.DEBUG) - logging.getLogger('ctypes_libdl').setLevel(logging.DEBUG) - - #dumps = makeDumps() - dumps = [open('test-ctypes2.dump.%d' % i, 'rb') for i in range(4)] - - #n1, diff1 = getDiff(dumps[0], dumps[1]) - #import code - # code.interact(local=locals()) - - n2, diff2 = getDiff(dumps[1], dumps[2]) - import code - code.interact(local=locals()) - - n3, diff3 = getDiff(dumps[2], dumps[3]) - import code - code.interact(local=locals()) - -if __name__ == '__main__': - main() # printSizeof() diff --git a/haystack/reverse/tmp/ctypes_skype.py b/haystack/reverse/tmp/ctypes_skype.py deleted file mode 100644 index 29de2ecb..00000000 --- a/haystack/reverse/tmp/ctypes_skype.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -__author__ = "Loic Jaquemet loic.jaquemet+python@gmail.com" - -import ctypes -import logging -import sys - -from haystack import model -from haystack import constraints - -log = logging.getLogger('ctypes_skype') - - -class SkypeStruct(LoadableMembers): - - ''' defines classRef ''' - pass - - -class Imp1(SkypeStruct): # resolved:True SIG:i4P4u8P4 pointerResolved:False - _fields_ = [ - ('small_int_0', ctypes.c_uint), # 1609 - ('ptr_4', ctypes.c_void_p), # @ ad66b88 [heap] - # else bytes:'\x00\xff\xff\xff\x01\x00\x00\x00' - ('untyped_8', ctypes.c_ubyte * 8), - ('ptr_16', ctypes.c_void_p), # @ ad67608 [heap] - ] - expectedValues = { - 'small_int_0': constraints.RangeValue(0, 36000), - 'ptr_4': constraints.NotNull, - 'untyped_8': constraints.PerfectMatch('\x00\xff\xff\xff\x01\x00\x00\x00'), - 'ptr_16': constraints.NotNull - } - - -class Imp2(SkypeStruct): # resolved:True SIG:i4P4u8P4 pointerResolved:False - _fields_ = [ - # else bytes:'\x00\xff\xff\xff\x01\x00\x00\x00' - ('check', ctypes.c_ubyte * 8), - ] - expectedValues = { - 'check': constraints.PerfectMatch('\x00\xff\xff\xff\x01\x00\x00\x00'), - } -# 172622824 -# 0xa4a03e8 - -model.registerModule(sys.modules[__name__]) diff --git a/haystack/reverse/utils.py b/haystack/reverse/utils.py deleted file mode 100644 index 25fb3298..00000000 --- a/haystack/reverse/utils.py +++ /dev/null @@ -1,258 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import print_function -from past.builtins import long - -""" -This module holds some basic utils function. -""" - -import itertools -import logging -import numpy -import os -import struct -import sys - -from haystack.reverse import config -import haystack.reverse.enumerators -import haystack.reverse.matchers - -log = logging.getLogger('utils') - - -def int_array_cache(filename): - if os.access(filename, os.F_OK): - # f = open(filename, 'r') - return numpy.load(filename) - # print 'int_array_cache' - return None - - -def int_array_save(filename, lst): - my_array = numpy.asarray(lst) - # numpy.save(open(filename, 'w'), my_array) - numpy.save(filename, my_array) - return my_array - - -def closestFloorValueNumpy(val, lst): - ''' return the closest previous value to where val should be in lst (or val) - please use numpy.array for lst - PERF ANOUNCEMENT - AFTER TESTING - you are better using numpy.array, 15x for [] for type(lst) than array.array (x22) - array.array is bad algo perf.... - ''' - # Find indices where elements should be inserted to maintain order. - if isinstance(lst, list): # TODO delete - log.warning('misuse of closestFloorValue') - try: - # be positive, its a small hit compared to searchsorted on - # non-numpy array - return lst.index(val) - except ValueError as e: - pass - return closestFloorValueOld(val, lst) - indicetab = numpy.searchsorted(lst, [val]) - ind = int(indicetab[0]) - if ind < len(lst): - if long(lst[ind]) == val: - return long(lst[ind]), ind - if ind == 0: - raise ValueError('Value %0x is under minimum' % val) - i = ind - 1 - return long(lst[i]), i - - -def closestFloorValueOld(val, lst): - ''' return the closest previous value to val in lst. O(4) than numpy with numpy.array ''' - if val in lst: - return val, lst.index(val) - prev = lst[0] - for i in xrange(1, len(lst) - 1): - if lst[i] > val: - return prev, i - 1 - prev = lst[i] - return lst[-1], len(lst) - 1 - -closestFloorValue = closestFloorValueNumpy - - -def dequeue(addrs, start, end): - ''' - dequeue address and return vaddr in interval ( Config.WORDSIZE ) from a list of vaddr - dequeue addrs from 0 to start. - dequeue all value between start and end in retval2 - return remaining after end, retval2 - ''' - ret = [] - while len(addrs) > 0 and addrs[0] < start: - addrs.pop(0) - # FIXME Config.WORDSIZE - WORDSIZE = 4 - while len(addrs) > 0 and addrs[0] >= start and addrs[0] <= end - WORDSIZE: - ret.append(addrs.pop(0)) - return addrs, ret - - -def get_cache_heap_pointers(ctx, enumerator): - """ - Cache or return Heap pointers values in enumerator . - :param dumpfilename: - :param heap_addr: the heap address for the cache filename - :return: - """ - heap_addrs_fname = ctx.get_filename_cache_pointers_addresses() - heap_values_fname = ctx.get_filename_cache_pointers_values() - heap_addrs = int_array_cache(heap_addrs_fname) - heap_values = int_array_cache(heap_values_fname) - if heap_addrs is None or heap_values is None: - log.info('[+] Making new cache - heap pointers') - heap_enum = enumerator.search() - if len(heap_enum) > 0: - heap_addrs, heap_values = zip(*heap_enum) # WTF - else: - heap_addrs, heap_values = (), () - log.info('\t[-] got %d pointers ' % (len(heap_enum))) - # merge - int_array_save(heap_addrs_fname, heap_addrs) - int_array_save(heap_values_fname, heap_values) - else: - log.debug('[+] Loading from cache %d pointers %d unique', len(heap_values), len(set(heap_values))) - return heap_addrs, heap_values - - -def cache_get_user_allocations(ctx, heap_walker): - """ - cache the user allocations, which are the allocated chunks - records addrs and sizes. - - :param dumpfilename: - :param memory_handler: - :param heapwalker: - :return: - """ - f_addrs = ctx.get_filename_cache_allocations_addresses() - f_sizes = ctx.get_filename_cache_allocations_sizes() - log.debug('reading from %s' % f_addrs) - addrs = int_array_cache(f_addrs) - sizes = int_array_cache(f_sizes) - if addrs is None or sizes is None: - log.debug('[+] Making new cache - getting allocated chunks from heap ') - # TODO : HeapWalker + order addresses ASC ... - # allocations = sorted(heapwalker.get_user_allocations(_memory_handler, heap)) - # TODO 2 , allocations should be triaged by mmapping ( heap.start ) before write2disk. - # Or the heap.start should be removed from the cache name.. it has no impact. - # heapwalker.cache_get_user_allocations should parse ALL mmappings to get all user allocations. - # But in that case, there will/could be a problem when using utils.closestFloorValue... - # in case of a pointer ( bad allocation ) out of a mmapping space. - # But that is not possible, because we are reporting factual reference to existing address space. - # OK. heap.start should be deleted from the cache name. - allocations = sorted(heap_walker.get_user_allocations()) - if len(allocations) == 0: - return [],[] - addrs, sizes = zip(*allocations) - addrs = int_array_save(f_addrs, addrs) - sizes = int_array_save(f_sizes, sizes) - else: - log.debug('[+] Loading from cache') - log.debug('\t[-] we have %d allocated chunks', len(addrs)) - return addrs, sizes - - -''' - a shareBytes array of bytes. no allocation buffer should be made, only indexes. -''' - - -class SharedBytes: - - def __init__(self, src): - self.src = src - self.start = 0 - self.end = len(src) - return - - def __makeMe(self, start, end): - if end < 0: - raise ValueError - if start < 0: - raise ValueError - sb = SharedBytes(self.src) - sb.start = start - sb.end = end - return sb - - def unpack(self, typ, bytes): - return struct.unpack(typ, str(bytes)) - - def pack(self, typ, *val): - return struct.pack(typ, *val) - - def __getslice__(self, start, end): - if start < 0: # reverse - start = self.end + start - elif start == sys.maxsize: - start = self.start - if end < 0: # reverse - end = self.end + end - elif end == sys.maxsize: - end = self.end - return self.__makeMe(start, end) - - def __len__(self): - return self.end - self.start - - def __getitem__(self, i): - if isinstance(i, slice): - return self.__getslice__(i) - if i < 0: # reverse - i = self.end + i - return self.src[self.start + i] - - def __getattribute__(self, *args): - log.debug('__getattribute__ %d %s' % (id(self), args)) - if len(args) == 1 and args[0] == 'src': - return getattr(self, 'src') - return self.src[self.start:self.end] # .__getattribute__(*args) - - def __getattr__(self, *args): - log.debug('__getattr__ %d %s' % (id(self), args)) - return getattr(self.src[self.start:self.end], *args) - - def __setstate__(self, d): - self.__dict__ = d.copy() - - def __getstate__(self): - return self.__dict__.copy() - - def __str__(self): - return self.src[self.start:self.end] - - def __repr__(self): - return repr(self.src[self.start:self.end]) - - def __iter__(self): - return iter(self.src[self.start:self.end]) - - -def nextStructure(context, struct): - ind = numpy.where(context._pointers_values == struct.vaddr)[0][0] - val = context._structures_addresses[ind + 1] - if val not in context.structures: - return None - if struct.vaddr + len(struct) != val: - print('*** WARNING nextStruct is not concurrent to struct') - return context.get_record_at_address[val] - - -def printNext(ctx, s): - s2 = nextStructure(ctx, s) - s2.decodeFields() - print(s2.toString()) - return s2 - - -def flatten(listOfLists): - return itertools.chain.from_iterable(listOfLists) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..e95ff6dc --- /dev/null +++ b/setup.cfg @@ -0,0 +1,3 @@ +[bdist_wheel] +# This flag says that the code is written to work on both Python 2 and Python 3. +universal=1 \ No newline at end of file diff --git a/setup.py b/setup.py index deedf27c..1d9f4ae6 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ def run(self): setup(name="haystack", - version="0.36", + version="0.40", description="Search C Structures in a process' memory", long_description=open("README.md").read(), url="http://packages.python.org/haystack/", @@ -66,23 +66,19 @@ def run(self): "Development Status :: 4 - Beta", # "Development Status :: 5 - Production/Stable", ], - keywords=["memory", "analysis", "forensics", "record", "struct", "ptrace", "reverse", "heap", "lfh", "lal"], + keywords=["memory", "analysis", "forensics", "record", "struct", "ptrace", "heap", "lfh", "lal"], author="Loic Jaquemet", author_email="loic.jaquemet+python@gmail.com", packages=["haystack", "haystack.abc", - "haystack.gui", "haystack.mappings", "haystack.outputters", - "haystack.reverse", - "haystack.reverse.heuristics", "haystack.search", "haystack.allocators", "haystack.allocators.libc", "haystack.allocators.win32"], # package_dir={"haystack.reverse": 'haystack/reverse'}, - package_data={"haystack.reverse.heuristics": ['data/words.100'], - "haystack.allocators.win32": ['win7heap32.constraints', + package_data={ "haystack.allocators.win32": ['win7heap32.constraints', 'win7heap64.constraints', 'winxpheap32.constraints', 'winxpheap64.constraints'], @@ -104,31 +100,12 @@ def run(self): 'haystack-volatility-dump = haystack.cli:volatility_dump', 'haystack-minidump-search = haystack.cli:minidump_search', 'haystack-minidump-show = haystack.cli:minidump_show', - 'haystack-reverse = haystack.reverse.cli:main_reverse', - 'haystack-minidump-reverse = haystack.reverse.cli:minidump_reverse', - 'haystack-reverse-show = haystack.reverse.cli:main_reverse_show', - 'haystack-reverse-parents = haystack.reverse.cli:main_reverse_parents', - 'haystack-reverse-hex = haystack.reverse.cli:main_reverse_hex', - 'haystack-minidump-reverse-show = haystack.reverse.cli:minidump_reverse_show', - 'haystack-minidump-reverse-parents = haystack.reverse.cli:minidump_reverse_parents', - 'haystack-minidump-reverse-hex = haystack.reverse.cli:minidump_reverse_hex', ] }, - scripts=[ - # "scripts/haystack-gui", - ], - # reverse: numpy is a dependency for reverse. - # https://github.com/numpy/numpy/issues/2434 - # numpy is already installed in travis-ci - ## setup_requires=["numpy"], # search: install requires only pefile, python-ptrace for memory-dump # reverse: install requires networkx, numpy, Levenshtein for signatures install_requires=["pefile>=1.2.10_139", "construct", - # reverse need these. - # "numpy", - # "networkx", - # "python-Levenshtein" ] + ["python-ptrace>=0.8.1"] if "win" not in sys.platform else [] + ["winappdbg"] if "win" in sys.platform else [], dependency_links=[ diff --git a/test/haystack/reverse/__init__.py b/test/haystack/reverse/__init__.py deleted file mode 100644 index 5b90dbb8..00000000 --- a/test/haystack/reverse/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""Unit test module.""" - -import unittest - -if __name__ == '__main__': - unittest.main(verbosity=0) - #suite = unittest.TestLoader().loadTestsFromTestCase(TestFunctions) - # unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/test/haystack/reverse/heuristics/__init__.py b/test/haystack/reverse/heuristics/__init__.py deleted file mode 100644 index 4a6f3e87..00000000 --- a/test/haystack/reverse/heuristics/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""Unit test module.""" - -import unittest - -__author__ = "Loic Jaquemet" -__copyright__ = "Copyright (C) 2012 Loic Jaquemet" -__email__ = "loic.jaquemet+python@gmail.com" -__license__ = "GPL" -__maintainer__ = "Loic Jaquemet" -__status__ = "Production" - -if __name__ == '__main__': - unittest.main(verbosity=0) - #suite = unittest.TestLoader().loadTestsFromTestCase(TestFunctions) - # unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/test/haystack/reverse/heuristics/test_dsa.py b/test/haystack/reverse/heuristics/test_dsa.py deleted file mode 100644 index a69a4021..00000000 --- a/test/haystack/reverse/heuristics/test_dsa.py +++ /dev/null @@ -1,432 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""Tests for haystack.reverse.structure.""" - -from __future__ import print_function -import unittest -import logging - -from haystack import target -from haystack import dump_loader -from haystack.abc import interfaces - -from haystack.reverse import fieldtypes -from haystack.reverse import context -from haystack.reverse import structure -from haystack.reverse.heuristics import dsa - -from test.testfiles import putty_7124_win7 -from test.testfiles import zeus_856_svchost_exe - -__author__ = "Loic Jaquemet" -__copyright__ = "Copyright (C) 2012 Loic Jaquemet" -__license__ = "GPL" -__maintainer__ = "Loic Jaquemet" -__email__ = "loic.jaquemet+python@gmail.com" -__status__ = "Production" - - -log = logging.getLogger('test_field_analyser') - - -class FS: - def __init__(self, bytes, vaddr=0): - self._bytes = bytes - self._vaddr = vaddr - - def __len__(self): - return len(self._bytes) - - def reset(self): - self._fields = [] - self._resolved = False - self._resolvedPointers = False - self._dirty = True - self._ctype = None - return - - @property - def bytes(self): - return self._bytes - - -class FakeMemoryHandler(interfaces.IMemoryHandler): - """Fake memoryhandler for the tests.""" - - def __init__(self, target): - self.target = target - - def get_name(self): - return "test" - - def get_target_platform(self): - return self.target - - def reset_mappings(self): - return - - -class TestFieldAnalyser(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.test1 = FS( - b'''\x00\x00\x00\x00....\x00\x00\x00\x00\x00\x00\x00\x00....\x00...\x00\x00\x00.\x00\x00\x00\x00....''') - cls.test2 = FS( - b'''....\x00\x00\x00\x00....\x00\x00\x00\x00\x00\x00\x00\x00....\x00...\x00\x00\x00.\x00\x00\x00\x00''') - cls.test3 = FS('''....1234aaaa.....''') - cls.test4 = FS( - b'''\x00\x00\x00\x00h\x00i\x00 \x00m\x00y\x00 \x00n\x00a\x00m\x00e\x00\x00\x00\xef\x00\x00\x00\x00\x00....''') - cls.test5 = FS( - b'\xd8\xf2d\x00P\xf3d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00CryptDllVerifyEncodedSignature\x00\x00') - cls.test6 = FS( - b'''edrtfguyiopserdtyuhijo45567890oguiy4e65rtiu\xf1\x07\x08\x09\x00''') - # - cls.test8 = FS( - b'C\x00:\x00\\\x00W\x00i\x00n\x00d\x00o\x00w\x00s\x00\\\x00S\x00y\x00s\x00t\x00e\x00m\x003\x002\x00\\\x00D\x00r\x00i\x00v\x00e\x00r\x00S\x00t\x00o\x00r\x00e\x00\x00\x00\xf1/\xa6\x08\x00\x00\x00\x88,\x00\x00\x00C\x00:\x00\\\x00P\x00r\x00o\x00g\x00r\x00a\x00m\x00 \x00F\x00i\x00l\x00e\x00s\x00 \x00(\x00x\x008\x006\x00)\x00\x00\x00P\x00u\x00T\x00Y\x00') - - # new test from real case zeus.856 @0xb2e38 - real = b'\xc81\x0b\x00\xa8*\x0b\x00\x01\x00\x00\x00\x00\x00\x00\x00f \x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\xe0\xa9`\x9dz3\xd0\x11\xbd\x88\x00\x00\xc0\x82\xe6\x9a\xed\x03\x00\x00\x01\x00\x00\x00\xc8\xfc\xbe\x02p\x0c\x00\x00\x08\x00\x00\x00\x1d\x00\x02\x00L\xfd\xbe\x02\xd8\x91\x1b\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x10\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00R\x00S\x00V\x00P\x00 \x00T\x00C\x00P\x00 \x00S\x00e\x00r\x00v\x00i\x00c\x00e\x00 \x00P\x00r\x00o\x00v\x00i\x00d\x00e\x00r\x00\x00\x00f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xab\xe9\x90|\xf2\x94\x80|\x00P\xfd\x7f\x00\x00\x1c\x00\x08\x00\x00\x00\x00\x00\x00\x00t\xfc\xbe\x02\\\r\x91|\x00\x00\x1c\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x88\xb0\xd2\x01\\\r\x91|\x00\x00\x1c\x00\x91\x0e\x91|\x08\x06\x1c\x00m\x05\x91|h^\xd0\x01\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\xc3\x00\x01\x00\x00\x000\x02\x1c\x00\x02\x00\x00\x00\x90\xb0\xd2\x01\x03\x00\x00\x00\x02\x00\x00\x00h^\xd0\x010\x02\x1c\x00\xd8>\xd4\x010\xf0\xfc\x00\xb8\x02\x1c\x00\xe8?\xd4\x01\xd8\x01\x1c\x00\x00\x00\x00\x00\x10\x00\x00\x00\xe8?\xd4\x01\x0c\x00\x00\x00\x05\x00\x00\x00\xf0\x06\x91|\xe0\x01\x1c\x00\x18\x00\x00\x00\xe0>\xd4\x01\x00\x00\x1c\x00\x01\x00\x00\x00\x08\x00\x00\x00\xe0\x01\x1c\x00@\x00\x00\x00\xf0?\xd4\x01\xa8\x04\x1c\x00\x00\x00\x1c\x00Om\x01\x01\x84^\xd0\x01`\x00\x00\x00\xb8\x02\x1c\x00\x00\x00\x00\x00\xd8>\xd4\x01\x88\xfc\xbe\x02F\x0f\x91|\r\x00\x00\x00\xd8>\xd4\x01\x00\x00\x1c\x00\x10<\xd4\x01\x00\x00\x00\x00\\\xfd\xbe\x02\\\r\x91|\x00\x00\x1c\x00\x91\x0e\x91|\x08\x06\x1c\x00m\x05\x91|`\xab\xf0\x00\x00\x00\x00\x00\xec<\xca\x02\x00\x00\xc3\x00\x0c\x00\x00\x00\x10<\xd4\x01\x00\x00\x00\x00\x00\x00\x00\x00\xd0\x0c\x00\x00\x00\x00\x00\x00\x18<\xd4\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\x0c\x00\x00(\xfd\xbe\x02\xa8\x04\x1c\x00\xd0\x0c\x00\x00@\x00\x00\x00\x03\x00\x00\x00\x18<\xd4\x01\xa8\x04\x1c\x00`\xab\xf0\x00\xc8\x02\x00\x00\xec<\xca\x02\x0c\x00\x0e\x00') - - not_smallints = [b'\xfa\xff\xfb\xff', b'\x01\xff\xff\x03', b'\x02\xff\x42\xff', - b'\x01\x00\x00\x01', b'\x00\x12\x01\xaa', b'\x00\xad\x00\x42', - b'\x00\x41\x00\x41', b'\x41\x00\x41\x00'] - for bytes in not_smallints: - fields = self.ints.make_fields(FS(bytes), 0, 4) - self.assertEqual(len([_ for _ in fields]), 0) - - def test_ascii(self): - fields = self.ascii.make_fields(self.test1, 0, len(self.test1)) - self.assertEqual(len([_ for _ in fields]), 3) - - fields = self.ascii.make_fields(self.test1, 8, len(self.test1) - 8) - self.assertEqual(len([_ for _ in fields]), 2) - - fields = self.ascii.make_fields(self.test2, 0, len(self.test2)) - self.assertEqual(len([_ for _ in fields]), 3) - - fields = self.ascii.make_fields(self.test3, 0, len(self.test3)) - self.assertEqual(len([_ for _ in fields]), 1) - - fields = self.ascii.make_fields(self.test4, 0, len(self.test4)) - self.assertEqual(len([_ for _ in fields]), 1) - - fields = self.ascii.make_fields(self.test3, 4, 12) - self.assertEqual(len([_ for _ in fields]), 1) - - fields = self.ascii.make_fields(self.test5, 0, len(self.test5)) - self.assertEqual(len([_ for _ in fields]), 1) - - fields = self.ascii.make_fields(self.test6, 0, len(self.test6)) - self.assertEqual(len([_ for _ in fields]), 1) - - fields = self.ascii.make_fields(self.test8, 0, len(self.test8)) - self.assertEqual(len([_ for _ in fields]), 0) - - -class TestDSA(unittest.TestCase): - - @classmethod - def setUpClass(cls): - # context.get_context('test/src/test-ctypes3.dump') - cls.context = None - cls.memory_handler = dump_loader.load(putty_7124_win7.dumpname) - cls.putty7124 = context.get_context_for_address(cls.memory_handler, putty_7124_win7.known_heaps[0][0]) - cls.dsa = dsa.FieldReverser(cls.putty7124.memory_handler) - cls.memory_handler = cls.putty7124.memory_handler - - def setUp(self): - pass - - def tearDown(self): - pass - - @unittest.expectedFailure # 'utf16 should start on aligned byte' - def test_utf_16_le_null_terminated(self): - - # struct_682638 in putty.7124.dump - vaddr = 0x682638 - size = 184 - st = structure.AnonymousRecord(self.memory_handler, vaddr, size) - self.dsa.reverse_record(self.context, st) - # print repr(st.bytes) - log.debug(st.to_string()) - fields = st.get_fields() - self.assertEqual(len([_ for _ in fields]), 5) # TODO should be 6 fields lllttp - self.assertEqual(fields[2].field_type, fieldtypes.STRING16) - self.assertTrue(fields[2].is_string()) - # TODO fields[3] should start at offset 12, not 10. - self.assertEqual(fields[3].field_type, fieldtypes.STRING16) - self.assertTrue(fields[3].is_string()) - # print f - - def test_utf_16_le_non_null_terminated(self): - """ non-null terminated """ - # struct_691ed8 in putty.7124.dump - vaddr = 0x691ed8 - size = 256 - st = structure.AnonymousRecord(self.memory_handler, vaddr, size) - self.dsa.reverse_record(self.context, st) - # print repr(st.bytes) - log.debug(st.to_string()) - fields = st.get_fields() - self.assertEqual(len([_ for _ in fields]), 2) - self.assertEqual(fields[1].field_type, fieldtypes.STRING16) - self.assertTrue(fields[1].is_string()) - - def test_ascii_null_terminated_2(self): - """ null terminated """ - # struct_64f328 in putty.7124.dump - vaddr = 0x64f328 - size = 72 - st = structure.AnonymousRecord(self.memory_handler, vaddr, size) - self.dsa.reverse_record(self.context, st) - # print repr(st.bytes) - log.debug(st.to_string()) - fields = st.get_fields() - self.assertEqual(len([_ for _ in fields]), 5) - self.assertEqual(fields[3].field_type, fieldtypes.STRINGNULL) - self.assertTrue(fields[3].is_string()) - - def test_utf_16_le_null_terminated_3(self): - ''' null terminated ''' - # in putty.7124.dump - vaddr = 0x657488 - size = 88 - st = structure.AnonymousRecord(self.memory_handler, vaddr, size) - self.dsa.reverse_record(self.context, st) - # print repr(st.bytes) - log.debug(st.to_string()) - fields = st.get_fields() - self.assertEqual(len([_ for _ in fields]), 2) # should be 3 Lt0? - self.assertEqual(fields[0].field_type, fieldtypes.STRING16) - self.assertTrue(fields[0].is_string()) - - def test_big_block(self): - ''' null terminated ''' - # in putty.7124.dump - vaddr = 0x63d4c8 # + 1968 - size = 4088 # 128 - st = structure.AnonymousRecord(self.memory_handler, vaddr, size) - self.dsa.reverse_record(self.context, st) - # print repr(st.bytes) - log.debug(st.to_string()) - fields = st.get_fields() - self.assertLess(len([_ for _ in fields]), 879) - #self.assertEqual( fields[35].field_type.name, fieldtypes.STRINGNULL) - #self.assertTrue( fields[35].isString()) - strfields = [f for f in st.get_fields() if f.is_string()] - # for f in strfields: - # print f.toString(), - self.assertGreater(len(strfields), 30) - - def test_uuid(self): - ''' null terminated ''' - # in putty.7124.dump - vaddr = 0x63aa68 - size = 120 - st = structure.AnonymousRecord(self.memory_handler, vaddr, size) - self.dsa.reverse_record(self.context, st) - # print repr(st.bytes) - log.debug(st.to_string()) - fields = st.get_fields() - self.assertEqual(len([_ for _ in fields]), 3) - self.assertEqual(fields[1].field_type, fieldtypes.STRING16) - self.assertTrue(fields[1].is_string()) - - pass - - def test_big_block_2(self): - # in putty.7124.dump - # its garbage anyway - vaddr = 0x675b30 - size = 8184 - st = structure.AnonymousRecord(self.memory_handler, vaddr, size) - self.dsa.reverse_record(self.context, st) - # print repr(st.bytes) - log.debug(st.to_string()) - fields = st.get_fields() - self.assertLess(len([_ for _ in fields]), 890) - #self.assertEqual( fields[35].field_type.name, fieldtypes.STRINGNULL) - #self.assertTrue( fields[35].isString()) - fields = [f for f in st.get_fields() if f.is_string()] - # for f in fields: - # print f.toString(), - - -class TestFieldAnalyserReal(unittest.TestCase): - - @classmethod - def setUpClass(cls): - from haystack import dump_loader - cls.memory_handler = dump_loader.load(zeus_856_svchost_exe.dumpname) - cls.context = context.get_context_for_address(cls.memory_handler, 0x90000) - cls.target = cls.memory_handler.get_target_platform() - cls.zeroes = dsa.ZeroFields(cls.memory_handler) - cls.utf16 = dsa.UTF16Fields(cls.memory_handler) - cls.ascii = dsa.PrintableAsciiFields(cls.memory_handler) - cls.ints = dsa.IntegerFields(cls.memory_handler) - - # new test from real case zeus.856 @0xb2e38 - cls.real = b'\xc81\x0b\x00\xa8*\x0b\x00\x01\x00\x00\x00\x00\x00\x00\x00f \x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\xe0\xa9`\x9dz3\xd0\x11\xbd\x88\x00\x00\xc0\x82\xe6\x9a\xed\x03\x00\x00\x01\x00\x00\x00\xc8\xfc\xbe\x02p\x0c\x00\x00\x08\x00\x00\x00\x1d\x00\x02\x00L\xfd\xbe\x02\xd8\x91\x1b\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x10\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00R\x00S\x00V\x00P\x00 \x00T\x00C\x00P\x00 \x00S\x00e\x00r\x00v\x00i\x00c\x00e\x00 \x00P\x00r\x00o\x00v\x00i\x00d\x00e\x00r\x00\x00\x00f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xab\xe9\x90|\xf2\x94\x80|\x00P\xfd\x7f\x00\x00\x1c\x00\x08\x00\x00\x00\x00\x00\x00\x00t\xfc\xbe\x02\\\r\x91|\x00\x00\x1c\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x88\xb0\xd2\x01\\\r\x91|\x00\x00\x1c\x00\x91\x0e\x91|\x08\x06\x1c\x00m\x05\x91|h^\xd0\x01\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\xc3\x00\x01\x00\x00\x000\x02\x1c\x00\x02\x00\x00\x00\x90\xb0\xd2\x01\x03\x00\x00\x00\x02\x00\x00\x00h^\xd0\x010\x02\x1c\x00\xd8>\xd4\x010\xf0\xfc\x00\xb8\x02\x1c\x00\xe8?\xd4\x01\xd8\x01\x1c\x00\x00\x00\x00\x00\x10\x00\x00\x00\xe8?\xd4\x01\x0c\x00\x00\x00\x05\x00\x00\x00\xf0\x06\x91|\xe0\x01\x1c\x00\x18\x00\x00\x00\xe0>\xd4\x01\x00\x00\x1c\x00\x01\x00\x00\x00\x08\x00\x00\x00\xe0\x01\x1c\x00@\x00\x00\x00\xf0?\xd4\x01\xa8\x04\x1c\x00\x00\x00\x1c\x00Om\x01\x01\x84^\xd0\x01`\x00\x00\x00\xb8\x02\x1c\x00\x00\x00\x00\x00\xd8>\xd4\x01\x88\xfc\xbe\x02F\x0f\x91|\r\x00\x00\x00\xd8>\xd4\x01\x00\x00\x1c\x00\x10<\xd4\x01\x00\x00\x00\x00\\\xfd\xbe\x02\\\r\x91|\x00\x00\x1c\x00\x91\x0e\x91|\x08\x06\x1c\x00m\x05\x91|`\xab\xf0\x00\x00\x00\x00\x00\xec<\xca\x02\x00\x00\xc3\x00\x0c\x00\x00\x00\x10<\xd4\x01\x00\x00\x00\x00\x00\x00\x00\x00\xd0\x0c\x00\x00\x00\x00\x00\x00\x18<\xd4\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\x0c\x00\x00(\xfd\xbe\x02\xa8\x04\x1c\x00\xd0\x0c\x00\x00@\x00\x00\x00\x03\x00\x00\x00\x18<\xd4\x01\xa8\x04\x1c\x00`\xab\xf0\x00\xc8\x02\x00\x00\xec<\xca\x02\x0c\x00\x0e\x00 node2 <-> ... <-> node255 """ - word_size = self.memory_handler.get_target_platform().get_word_size() - process_context = self.memory_handler.get_reverse_context() - - start_addr = self.offsets['start_list'][0] - mid_addr = self.offsets['mid_list'][0] - end_addr = self.offsets['end_list'][0] - - heap_context = process_context.get_context_for_address(start_addr) - self.assertIsNotNone(heap_context) - - start = heap_context.get_record_for_address(start_addr) - mid = heap_context.get_record_for_address(mid_addr) - end = heap_context.get_record_for_address(end_addr) - # print mid.to_string() - # reverse the list - self.dllr.reverse() - print(mid.to_string()) - size = len(mid) - # there is a list for this size - self.assertIn(size, self.dllr.lists) - # is the offset is the same for all item in the list ? - - ## self.dllr.debug_lists() - log.debug("start: %x size: %d", start_addr, self.sizes['start_list']) - log.debug("end: %x size: %d", end_addr, self.sizes['end_list']) - - # the second field should be "ptr_8" - self.assertEqual(start.get_fields()[0], start.get_field('zerroes_0')) - self.assertEqual(start.get_fields()[1], start.get_field('ptr_8')) - self.assertEqual(start.get_fields()[2], start.get_field('zerroes_16')) - self.assertEqual(mid.get_fields()[0], mid.get_field('small_int_0')) - self.assertEqual(mid.get_fields()[1], mid.get_field('ptr_8')) - self.assertEqual(mid.get_fields()[2], mid.get_field('ptr_16')) - self.assertEqual(end.get_fields()[0], end.get_field('small_int_0')) - self.assertEqual(end.get_fields()[1], end.get_field('zerroes_8')) - self.assertEqual(end.get_fields()[2], end.get_field('ptr_16')) - - # while the item is of size 32, we have padding. making 40 - members_list = [l for l in self.dllr.lists[40][8] if len(l) == 255][0] - self.assertEqual(len(members_list), 255) - # check head - self.assertEqual(members_list[0], start_addr) - self.assertEqual(members_list[-1], end_addr) - # check that our list order is correct - self.assertEqual(start.address, members_list[0]) - self.assertEqual(mid.address, members_list[127]) - self.assertEqual(end.address, members_list[254]) - - # reverse the types for the list of items 40, at offset 8 - offset = 8 - self.dllr.rename_record_type(members_list, offset) - # print mid.to_string() - - # now the second field should be "entry" LIST ENTRY type with 2 subfields. - one_ptr = start.get_fields()[1] - self.assertEqual(start.get_fields()[1], start.get_field('list')) - self.assertEqual(one_ptr.name, 'list') - self.assertEqual(mid.get_fields()[1], mid.get_field('list')) - self.assertEqual(end.get_fields()[1], end.get_field('list')) - - # but also types should be the same across list members - self.assertEqual(start.get_fields()[1], end.get_field('list')) - self.assertEqual(start.record_type, end.record_type) - self.assertEqual(mid.record_type, end.record_type) - # and get_fields produce different list of the same fields - self.assertEqual(start.get_fields(), end.get_fields()) - - # get the pointer value and iterate over each item - item_list_entry_addr = start_addr+offset - for i in range(1, 255): - # get the pointee record - next_item = heap_context.get_record_for_address(item_list_entry_addr-offset) - # still of the same size, record_type and such - self.assertEqual(len(next_item), size) - self.assertEqual(start.record_type, next_item.record_type) - self.assertEqual(next_item.get_fields()[1], next_item.get_field('list')) - # anyway, start->list has 2 members - item_list_entry = next_item.get_field('list') - self.assertEqual(len(item_list_entry.get_fields()), 2) - # check the names - self.assertEqual(item_list_entry.get_fields()[0], item_list_entry.get_field('Next')) - self.assertEqual(item_list_entry.get_fields()[1], item_list_entry.get_field('Back')) - # get the next list item - next_one = item_list_entry.get_field('Next') - item_value = item_list_entry.get_value_for_field(next_one, word_size) - #print i, hex(item_value-offset) - self.assertEqual(item_value-offset, members_list[i]) - item_list_entry_addr = item_value - - # we should be at last item - self.assertEqual(item_list_entry_addr-offset, end.address) - - def test_double_iter_with_head(self): - """// test head -> node1 <-> node2 <-> ... <-> node16""" - word_size = self.memory_handler.get_target_platform().get_word_size() - process_context = self.memory_handler.get_reverse_context() - - head_addr = self.offsets['head_start_list'][0] - first_addr = self.offsets['head_first_item'][0] - last_addr = self.offsets['head_last_item'][0] - - heap_context = process_context.get_context_for_address(head_addr) - self.assertIsNotNone(heap_context) - - head = heap_context.get_record_for_address(head_addr) - first = heap_context.get_record_for_address(first_addr) - last = heap_context.get_record_for_address(last_addr) - # print mid.to_string() - # reverse the list - self.dllr.reverse() - # print first.to_string() - # print last.to_string() - size = len(last) - # there is a list for this size - self.assertIn(size, self.dllr.lists) - # is the offset is the same for all item in the list ? - - ## self.dllr.debug_lists() - log.debug("head: %x size: %d", head_addr, self.sizes['head_start_list']) - log.debug("first: %x size: %d", first_addr, self.sizes['head_first_item']) - log.debug("end: %x size: %d", last_addr, self.sizes['head_last_item']) - - # reverse the types for the list of items 40, at offset 8 - offset = 8 - # while the item is of size 32, we have padding. making 40 - # we need the list with first_addr in it - members_list = [l for l in self.dllr.lists[40][8] if first_addr in l][0] - self.assertEqual(len(members_list), 16) - # check head - self.assertEqual(members_list[0], first_addr) - self.assertEqual(members_list[-1], last_addr) - # the head is not in the list, because head as a different size - self.assertNotEqual(len(head), len(first)) - self.assertNotIn(head_addr, members_list) - - # reverse - self.dllr.rename_record_type(members_list, offset) - - # get the pointer value and iterate over each item - item_list_entry_addr = first_addr+offset - for i in range(1, 16): - # get the pointee record - next_item = heap_context.get_record_for_address(item_list_entry_addr-offset) - # still of the same size, record_type and such - self.assertEqual(len(next_item), size) - self.assertEqual(first.record_type, next_item.record_type) - self.assertEqual(next_item.get_fields()[1], next_item.get_field('list')) - # anyway, start->list has 2 members - item_list_entry = next_item.get_field('list') - self.assertEqual(len(item_list_entry.get_fields()), 2) - # check the names - self.assertEqual(item_list_entry.get_fields()[0], item_list_entry.get_field('Next')) - self.assertEqual(item_list_entry.get_fields()[1], item_list_entry.get_field('Back')) - # get the next list item - next_one = item_list_entry.get_field('Next') - item_value = item_list_entry.get_value_for_field(next_one, word_size) - #print i, hex(item_value-offset) - self.assertEqual(item_value-offset, members_list[i]) - item_list_entry_addr = item_value - - # we should be at last item - self.assertEqual(item_list_entry_addr-offset, last.address) - - def test_double_iter_loop_with_head(self): - """// test head <-> node1 <-> node2 <-> ... <-> node16 <-> head <-> node1 <-> ....""" - word_size = self.memory_handler.get_target_platform().get_word_size() - process_context = self.memory_handler.get_reverse_context() - - head_addr = self.offsets['head_loop_start_list'][0] - first_addr = self.offsets['head_loop_first_item'][0] - last_addr = self.offsets['head_loop_last_item'][0] - - heap_context = process_context.get_context_for_address(head_addr) - self.assertIsNotNone(heap_context) - - head = heap_context.get_record_for_address(head_addr) - first = heap_context.get_record_for_address(first_addr) - last = heap_context.get_record_for_address(last_addr) - # reverse the list - self.dllr.reverse() - size = len(last) - # there is a list for this size - self.assertIn(size, self.dllr.lists) - log.debug("head: %x size: %d", head_addr, self.sizes['head_loop_start_list']) - log.debug("first: %x size: %d", first_addr, self.sizes['head_loop_first_item']) - log.debug("end: %x size: %d", last_addr, self.sizes['head_loop_last_item']) - - # reverse the types for the list of items 40, at offset 8 - offset = 8 - # while the item is of size 32, we have padding. making 40 - # we need the list with first_addr in it - members_list = [l for l in self.dllr.lists[40][8] if first_addr in l][0] - self.assertEqual(len(members_list), 16) - # check first - self.assertEqual(members_list[0], first_addr) - self.assertEqual(members_list[-1], last_addr) - # the head is not in the list, because head as a different size - self.assertNotEqual(len(head), len(first)) - self.assertNotIn(head_addr, members_list) - # etc... - - def test_double_iter_loop_with_head_insertion(self): - """// test head -> node1 <-> node2 <-> ... <-> node16 <-> node1 <-> node2 ...""" - process_context = self.memory_handler.get_reverse_context() - - head_addr = self.offsets['loop_head_insert'][0] - first_addr = self.offsets['loop_first_item'][0] - last_addr = self.offsets['loop_last_item'][0] - - heap_context = process_context.get_context_for_address(head_addr) - self.assertIsNotNone(heap_context) - - head = heap_context.get_record_for_address(head_addr) - first = heap_context.get_record_for_address(first_addr) - last = heap_context.get_record_for_address(last_addr) - # reverse the list - self.dllr.reverse() - size = len(last) - # there is a list for this size - self.assertIn(size, self.dllr.lists) - - self.dllr.debug_lists() - log.debug("head: %x size: %d", head_addr, self.sizes['loop_head_insert']) - log.debug("first: %x size: %d", first_addr, self.sizes['loop_first_item']) - log.debug("end: %x size: %d", last_addr, self.sizes['loop_last_item']) - - # reverse the types for the list of items 40, at offset 8 - offset = 8 - # while the item is of size 32, we have padding. making 40 - # we need the list with first_addr in it - members_list = [l for l in self.dllr.lists[40][8] if first_addr in l][0] - self.assertEqual(len(members_list), 16) - # check that first is in list. - # but first is not nessarly [0] due to full loop - ind_first = members_list.index(first_addr) - ind_last = members_list.index(last_addr) - list_size = len(members_list) - # but last is before first, for sure. - self.assertEqual(list_size % ind_first, list_size % (ind_last+1)) - # the head is not in the list, because head as a different size - self.assertNotEqual(len(head), len(first)) - self.assertNotIn(head_addr, members_list) - # etc... - - -class TestStructureSizes(SrcTests): - - @classmethod - def setUpClass(cls): - pass - #sys.path.append('test/src/') - #import ctypes3 - # - #node = ctypes3.struct_Node - #node._expectedValues_ = dict( - # [('val1', [0xdeadbeef]), ('ptr2', [constraints.NotNull])]) - #test3 = ctypes3.struct_test3 - #test3._expectedValues_ = dict([ - # ('val1', [0xdeadbeef]), - # ('val1b', [0xdeadbeef]), - # ('val2', [0x10101010]), - # ('val2b', [0x10101010]), - # ('me', [constraints.NotNull])]) - - def setUp(self): - # os.chdir() - self.memory_handler = dump_loader.load('test/src/test-ctypes3.32.dump') - self._load_offsets_values(self.memory_handler.get_name()) - finder = self.memory_handler.get_heap_finder() - walkers = finder.list_heap_walkers() - self.context = context.get_context_for_address(self.memory_handler, walkers[0]) - ## - self.dsa = dsa.FieldReverser(self.memory_handler) - - def tearDown(self): - self.memory_handler.reset_mappings() - self.context = None - - @unittest.skip('DEBUGging the other one') - def test_sizes(self): - ctypes = self.context.memory_handler.get_target_platform().get_target_ctypes() - structs = self.context.listStructures() - sizes = sorted(set([len(s) for s in structs])) - ctypes3 = self.context.memory_handler.get_model().import_module('test.src.ctypes3_32') - for st in structs: # [1:2]: - self.dsa.reverse_record(self.context, st) - #print st.toString() - # print repr(self.context.heap.readBytes(st._vaddr, len(st))) - - # there are only two struct types - # the free chunks is not listed - self.assertEqual(len(sizes), 2) - self.assertEqual(len(structs), 6) - - # our compiler put a padding at the end of struct_Node - # struct_node should be 8, no padding, but its 12. - self.assertEqual(sizes, [12,20]) - - #st = ctypes3.Node() - # print st.toString(), st._expectedValues_ - - self.assertEqual(ctypes.sizeof(ctypes.c_void_p),4) - self.assertEqual(ctypes3.struct_test3.me.size,4) - self.assertEqual(sizes[1], ctypes.sizeof(ctypes3.struct_test3)) - - # our compiler put a padding at the end of struct_Node - # struct_node should be 8, no padding, but its 12. - self.assertNotEqual( - sizes[0], - ctypes.sizeof( - ctypes3.struct_Node), - 'There should be a 4 bytes padding here') - self.assertEqual( - sizes[0] - 4, - ctypes.sizeof( - ctypes3.struct_Node), - 'There should be a 4 bytes padding here') - - -class TestFullReverse(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.dumpname = 'test/dumps/ssh/ssh.1' - #config.remove_cache_folder(cls.dumpname) - cls.memory_handler = dump_loader.load(ssh_1_i386_linux.dumpname) - return - - @classmethod - def tearDownClass(cls): - #config.remove_cache_folder(cls.dumpname) - cls.memory_handler = None - return - - def test_reverse_heap(self): - log.info('START test test_reverseInstances') - ctx = api.reverse_heap(self.memory_handler, ssh_1_i386_linux.known_heaps[0][0]) - - memory_handler = self.memory_handler - finder = memory_handler.get_heap_finder() - walkers = finder.list_heap_walkers() - - self.assertEqual(len(walkers), len(ssh_1_i386_linux.known_heaps)) - #pointers - self.assertEqual(2236, len(ctx.listPointerValueInHeap())) - self.assertEqual(2568, len(ctx.list_allocations_addresses())) - self.assertEqual(2568, len(ctx._list_records())) - self.assertEqual(2568, ctx.get_record_count()) - self.assertIn('ssh.1/cache/b84e0000.ctx', ctx.get_filename_cache_context()) - self.assertIn('ssh.1/cache/b84e0000.headers_values.py', ctx.get_filename_cache_headers()) - self.assertIn('ssh.1/cache/b84e0000.graph.gexf', ctx.get_filename_cache_graph()) - self.assertIn('ssh.1/cache/structs', ctx.get_folder_cache_structures()) - - return - - -@unittest.skip -class TestReverseZeus(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.dumpname = zeus_856_svchost_exe.dumpname - cls.memory_handler = dump_loader.load(zeus_856_svchost_exe.dumpname) - cls.process_context = cls.memory_handler.get_reverse_context() - cls.process_context.create_record_cache_folder() - ## - cls.offset = zeus_856_svchost_exe.known_records[0][0] - cls._context = context.get_context_for_address(cls.memory_handler, cls.offset) - api.reverse_instances(cls.memory_handler) - return - - @classmethod - def tearDownClass(cls): - #config.remove_cache_folder(cls.dumpname) - cls.memory_handler = None - cls._context = None - return - - def _v(self, record): - if True: - return record.get_signature_text() - else: - return record.to_string() - - def test_reverse_heap(self): - #ctx = reversers.reverse_heap(self.memory_handler, zeus_856_svchost_exe.known_heaps[0][0]) - - struct_d = self._context.get_record_for_address(self.offset) - struct_d.reset() - - sig_1 = struct_d.get_signature_text() - # print '1.', self._v(struct_d) - #self.assertEqual(sig_1, 'P4P4P4P4P4P4P4i4z4i4i4z8P4P4z8P4i4u16z4i4z4P4P4P4P4z64P4P4P4P4P4P4P4i4z4i4i4z8P4P4z8P4i4u16z4i4z4P4P4P4P4z64P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z8272P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z180u4z176') - - # decode bytes contents to find basic types. - fr = dsa.FieldReverser(self.memory_handler) - fr.reverse() - sig_2 = struct_d.get_signature_text() - # print '2.', self._v(struct_d) - # no double linked list in here - #self.assertEqual(sig_2, 'P4P4P4P4P4P4P4i4z4i4i4z8P4P4z8P4i4u16z4i4z4P4P4P4P4z64P4P4P4P4P4P4P4i4z4i4i4z8P4P4z8P4i4u16z4i4z4P4P4P4P4z64P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z8272P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z180u4z176') - # print struct_d.to_string() - #import code - #code.interact(local=locals()) - - # try to find some logical constructs. - doublelink = reversers.DoubleLinkedListReverser(self.memory_handler) - doublelink.reverse() - #self.assertEqual(doublelink.found, 12) - sig_3 = struct_d.get_signature_text() - # print '3.', self._v(struct_d) - #self.assertEqual(sig_3, 'P4P4P4P4P4P4P4i4z4i4i4z8P4P4z8P4i4u16z4i4z4P4P4P4P4z64P4P4P4P4P4P4P4i4z4i4i4z8P4P4z8P4i4u16z4i4z4P4P4P4P4z64P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z8272P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z176P4u4z180u4z176') - # print struct_d.to_string() - #import code - #code.interact(local=locals()) - - # identify pointer relation between allocators - pfr = pointertypes.PointerFieldReverser(self.memory_handler) - pfr.reverse() - sig_4 = struct_d.get_signature_text() - # print '4.', self._v(struct_d) - # print struct_d.to_string() - #import code - #code.interact(local=locals()) - - # aggregate field of same type in an array - #afr = reversers.ArrayFieldsReverser(self._context) - #afr.reverse() - #sig_5 = struct_d.get_signature_text() - # print '5.', self._v(struct_d) - # print struct_d.to_string() - #import code - #code.interact(local=locals()) - - tr = signature.TypeReverser(self.memory_handler) - tr.reverse() - sig_6 = struct_d.get_signature_text() - # print '6.', self._v(struct_d) - # print "tr._similarities", tr._similarities - for a,b in tr._similarities: - # print self._context.get_record_for_address(a).to_string() - # print self._context.get_record_for_address(b).to_string() - #import code - #code.interact(local=locals()) - pass - - -@unittest.skip -class TestReversers(SrcTests): - - def setUp(self): - self.memory_handler = dump_loader.load('test/src/test-ctypes5.64.dump') - self._load_offsets_values(self.memory_handler.get_name()) - sys.path.append('test/src/') - - self.offset = self.offsets['struct_d'][0] - self.m = self.memory_handler.get_mapping_for_address(self.offset) - - self._context = context.get_context_for_address(self.memory_handler, self.offset) - - # reverse the heap - if not os.access(config.get_record_cache_folder_name(self._context.dumpname), os.F_OK): - os.mkdir(config.get_record_cache_folder_name(self._context.dumpname)) - - log.info("[+] Cache created in %s", config.get_cache_folder_name(self._context.dumpname)) - - def tearDown(self): - self.memory_handler.reset_mappings() - self.memory_handler = None - self.m = None - self.usual = None - sys.path.remove('test/src/') - - def _v(self, record): - if True: - return record.get_signature_text() - else: - return record.to_string() - - def test_reversers(self): - - # order of resolution should be - # FieldReverser - # DoubleLinkedListReverser - # PointerFieldReverser - # TypeReverser - - struct_d = self._context.get_record_for_address(self.offset) - sig_1 = struct_d.get_signature_text() - # print '1.', self._v(struct_d) - - # try to find some logical constructs. - doublelink = reversers.DoubleLinkedListReverser(self.memory_handler) - doublelink.reverse() - sig_2 = struct_d.get_signature_text() - # print '2.', self._v(struct_d) - # no double linked list in here - self.assertEqual('', sig_2) - - # decode bytes contents to find basic types. - fr = dsa.FieldReverser(self.memory_handler) - fr.reverse() - sig_3 = struct_d.get_signature_text() - # print '3.', self._v(struct_d) - #self.assertEqual(sig_3, 'P8P8P8z24i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z8i8z8i8z8i8z8i8z8i8z8i8z8i8z8i8z8P8P8P8P8P8P8P8P8P8P8P8P8u40P8P8P8P8P8P8P8P8P8P8i8P8T14u2z16P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z16P8') - - # identify pointer relation between allocators - pfr = pointertypes.PointerFieldReverser(self.memory_handler) - pfr.reverse() - sig_4 = struct_d.get_signature_text() - # print '4.', self._v(struct_d) - - #logging.getLogger("reversers").setLevel(logging.DEBUG) - - # aggregate field of same type in an array - ## FIXME very very long. - #afr = reversers.ArrayFieldsReverser(self._context) - #afr.reverse() - #sig_5 = struct_d.get_signature_text() - # print '5.', self._v(struct_d) - - tr = signature.TypeReverser(self.memory_handler) - tr.reverse() - sig_6 = struct_d.get_signature_text() - # print '6.', self._v(struct_d) - # print "tr._similarities", tr._similarities - for a,b in tr._similarities: - # print self._context.get_record_for_address(a).to_string() - # print self._context.get_record_for_address(b).to_string() - #import code - #code.interact(local=locals()) - pass - - #self.assertNotEqual(sig_4, sig_5) - #self.assertEqual(sig_4, 'P8P8P8z24i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z40i8z8i8z8i8z8i8z8i8z8i8z8i8z8i8z8i8z8i8z8P8P8P8P8P8P8P8P8P8P8P8P8u40P8P8P8P8P8P8P8P8P8P8i8P8T14u2z16P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z8P8z16P8') - #self.assertEqual(sig_5, 'a24z24i8a640z8a128a96u40a80i8P8T14u2z16P8a304z16P8') - # print 'struct_d 0x%x' % self.offset - - # print struct_d.to_string() - #import code - #code.interact(local=locals()) - - -class TestGraphReverser(SrcTests): - - @classmethod - def setUpClass(cls): - cls.dumpname = zeus_856_svchost_exe.dumpname - #config.remove_cache_folder(cls.dumpname) - cls.memory_handler = dump_loader.load(zeus_856_svchost_exe.dumpname) - ## - cls.offset = zeus_856_svchost_exe.known_records[0][0] - cls._context = context.get_context_for_address(cls.memory_handler, cls.offset) - return - - @classmethod - def tearDownClass(cls): - #config.remove_cache_folder(cls.dumpname) - cls.memory_handler = None - cls._context = None - return - - def _v(self, record): - if True: - return record.get_signature_text() - else: - return record.to_string() - - def test_graph(self): - log.debug('Reversing PointerGraph') - ptrgraph = reversers.PointerGraphReverser(self.memory_handler) - ptrgraph.reverse() - - -class TestEnrichedPointerAnalyserReal(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.memory_handler = dump_loader.load(zeus_856_svchost_exe.dumpname) - cls._context = context.get_context_for_address(cls.memory_handler, 0x90000) - - @classmethod - def tearDownClass(cls): - cls.memory_handler = None - cls._context = None - - def test_doublelink(self): - # reverse first with dsa - _record = self._context.get_record_for_address(0xccd00) - _record.reset() - - revdsa = dsa.FieldReverser(self.memory_handler) - revdsa.reverse() - - rev = reversers.DoubleLinkedListReverser(self.memory_handler) - # interesting records - # SIG:T4i4P4P4i4z12 - # struct_bbf78 struct_a6518 struct_cca28 - # list goes from 0xccd28, 0xccd00 to 0x98268 - #_record = self._context.get_record_for_address(0xccd28) - _record = self._context.get_record_for_address(0xccd00) - print(_record.to_string()) - #_record.set_reverse_level(9) - ## - rev.reverse_record(self._context, _record) - - print(_record.to_string()) - n1 = self._context.get_record_for_address(0x000ccae8) - print(n1.to_string()) - tail = self._context.get_record_for_address(0x98268) - print(tail.to_string()) - expected = [0xccd28,0xccd00,0xccae8,0xcca50,0xcca28,0xcc428,0xc6878,0xdcbc8,0xdcb40,0xcd300,0xbbf78,0xbefd8,0xbecd8,0xbc560,0xbbee0,0xbbda8,0xbbb38,0xbbae0,0xa6518,0xb5d00,0xb5cd8,0xb5cb0,0xb5b70,0xb1aa8,0xa20b8,0x9e2f8,0xa1920,0xa1838,0x98268] - size_records = len(tail) - # offset = 8 - offset = 8 - rev.rename_all_lists() - - self.assertEqual(rev.lists[size_records][offset][0], expected) - - # rename all lists - for size, offset_lists in rev.lists.items(): - for offset, multiple_lists in offset_lists.items(): - for members_list in multiple_lists: - nb = len(members_list) - rt = rev.rename_record_type(members_list, offset) - log.debug('%d members for : %s', nb, rt.to_string()) - - - pass - - -class TestTypeReverser(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.memory_handler = dump_loader.load(zeus_856_svchost_exe.dumpname) - cls._context = context.get_context_for_address(cls.memory_handler, 0x90000) - - @classmethod - def tearDownClass(cls): - cls.memory_handler = None - cls._context = None - - def test_doublelink(self): - rev = signature.TypeReverser(self.memory_handler) - # interesting records - # SIG:T4i4P4P4i4z12 - # struct_bbf78 struct_a6518 struct_cca28 - # list goes from 0xccd28, 0xccd00 to 0x98268 - #_record = self._context.get_record_for_address(0xccd28) - _record = self._context.get_record_for_address(0xccd00) - print(_record.to_string()) - _record.set_reverse_level(10) - rev.reverse_context(self._context) - print(_record.to_string()) - pass - - def test_otherlink(self): - # 0xa6f40, 0xa6f70 - _record = self._context.get_record_for_address(0xccd00) - print(_record.to_string()) - #import code - #code.interact(local=locals()) - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - # logging.getLogger("reversers").setLevel(logging.DEBUG) - # logging.getLogger("signature").setLevel(logging.DEBUG) - # logging.getLogger("test_reversers").setLevel(logging.DEBUG) - # logging.getLogger("structure").setLevel(logging.DEBUG) - # logging.getLogger("dsa").setLevel(logging.DEBUG) - # logging.getLogger("winxpheap").setLevel(logging.DEBUG) - unittest.main(verbosity=2) diff --git a/test/haystack/reverse/test_api.py b/test/haystack/reverse/test_api.py deleted file mode 100644 index 58865d10..00000000 --- a/test/haystack/reverse/test_api.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import print_function -import logging -import unittest - -from haystack import dump_loader -from haystack.reverse import api -from test.testfiles import zeus_856_svchost_exe - -log = logging.getLogger("test_reverse_api") - - -class TestReverseApi(unittest.TestCase): - - def setUp(self): - dumpname = zeus_856_svchost_exe.dumpname - self.memory_handler = dump_loader.load(dumpname) - process_context = self.memory_handler.get_reverse_context() - - def tearDown(self): - self.memory_handler.reset_mappings() - self.memory_handler = None - - def test_pred(self): - addr = 0xc32628 - addr = 0xc32060 - process_context = self.memory_handler.get_reverse_context() - heap_context = process_context.get_context_for_address(addr) - # ordered allocation - allocs = heap_context.list_allocations_addresses() - self.assertEqual(allocs[0], 0xc30688) - _record = api.get_record_at_address(self.memory_handler, addr) - self.assertEqual(_record.address, addr) - #self.assertEqual(len(_record.get_fields()), 3) - print(_record.to_string()) - # FIXME - process must be reversed. Graph must be generated. - pred = api.get_record_predecessors(self.memory_handler, _record) - print('pred', pred) - for p in pred: - print(p.to_string()) - pass - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - # logging.getLogger("listmodel").setLevel(logging.DEBUG) - # logging.getLogger("reversers").setLevel(logging.DEBUG) - # logging.getLogger("signature").setLevel(logging.DEBUG) - # logging.getLogger("test_reversers").setLevel(logging.DEBUG) - # logging.getLogger("structure").setLevel(logging.DEBUG) - # logging.getLogger("dsa").setLevel(logging.DEBUG) - # logging.getLogger("winxpheap").setLevel(logging.DEBUG) - unittest.main(verbosity=2) diff --git a/test/haystack/reverse/test_cache.py b/test/haystack/reverse/test_cache.py deleted file mode 100644 index 7c9a590d..00000000 --- a/test/haystack/reverse/test_cache.py +++ /dev/null @@ -1,147 +0,0 @@ - -# -# -# this should test the memoryleaks... -# -# -# - -from __future__ import print_function - -def main(): - from haystack.reverse import context - ctx = context.get_context('test/dumps/skype/skype.1/skype.1.f') - from haystack.reverse import structure - it = structure.cache_load_all_lazy(ctx) - - structs = [] - for i in range(10000): - structs.append(it.next()) - - [s.to_string() for addr, s in structs] - - # 51 Mo - - structure.CacheWrapper.refs.size = 5 - for i in range(5): - structure.CacheWrapper.refs[i] = i - - # 51 Mo - - from meliae import scanner - scanner.dump_all_objects('filename.json') - - from meliae import loader - om = loader.load('filename.json') - s = om.summarize() - s - ''' - Total 206750 objects, 150 types, Total size = 27.2MiB (28495037 bytes) - Index Count % Size % Cum Max Kind - 0 75801 36 7529074 26 26 27683 str - 1 11507 5 6351864 22 48 552 Field - 2 16 0 5926913 20 69 2653328 numpy.ndarray - 3 10000 4 1680000 5 75 168 CacheWrapper - 4 2099 1 1158648 4 79 552 AnonymousStructInstance - 5 1182 0 857136 3 82 98440 dict - 6 18630 9 745200 2 85 40 weakref - 7 14136 6 633148 2 87 43812 list - ''' - # clearly Field instances keep some place.... - # most 10000 Anonymous intances are not int memory now - - om.compute_referrers() - - # om[ addr].parents - # om[ addr].children - - # get the biggest Field - f_addr = s.summaries[1].max_address - om[f_addr] - - # Field(179830860 552B 21refs 1par) - - om[f_addr].parents - # [179834316] - # >>> om[ 179834316 ] - # list(179834316 132B 19refs 1par) <- list of fields in Struct - - l_addr = om[f_addr].parents[0] - om[l_addr].parents - # [179849516] - # >>> om[ 179849516 ] - # AnonymousStructInstance(179849516 552B 23refs 19par) - - anon_addr = om[l_addr].parents[0] - om[anon_addr] - # 179849516 is a anon struct - import networkx - import matplotlib.pyplot as plt - - graphme() - - -def n(o): - return str(o).split(' ')[0] - - -def stop(o): - s = n(om[o]) - if s.startswith('classobj') or s.startswith('func'): - return True - if s.startswith('module') or s.startswith('local'): - return True - return False - - -def rec_add_child(graph, knowns, addr, t=''): - for c in om[addr].children: - if stop(c): - return - graph.add_edge(n(om[addr]), n(om[c])) - childscount = len(om[c].children) - print('c:', c, 'has', childscount, 'children') - if childscount > 0: - print(om[c]) - # add rec - if c in knowns: - return - knowns.add(c) - rec_add_child(graph, knowns, c, t + '\t') - rec_add_parent(graph, knowns, c, t + '\t') - - -def rec_add_parent(graph, knowns, addr, t=''): - for p in om[addr].parents: - if stop(p): - return - graph.add_edge(n(om[p]), n(om[addr])) - childscount = len(om[p].parents) - print('p:', p, 'has', childscount, 'parents') - if childscount > 0: - print(om[p]) - # add rec - if p in knowns: - return - knowns.add(p) - rec_add_parent(graph, knowns, p, t + '\t') - rec_add_child(graph, knowns, p, t + '\t') - - -def graphme(): - mygraph = networkx.DiGraph() - addr = anon_addr - known = set() - known.add(addr) - - rec_add_child(mygraph, known, addr) - - known = set() - known.add(addr) - rec_add_parent(mygraph, known, addr) - - #pos = networkx.spring_layout(mygraph) - # networkx.draw(mygraph,pos) - - # plt.show() - networkx.readwrite.gexf.write_gexf(mygraph, 'test.gexf') diff --git a/test/haystack/reverse/test_context.py b/test/haystack/reverse/test_context.py deleted file mode 100644 index 126b2eeb..00000000 --- a/test/haystack/reverse/test_context.py +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""Tests haystack.utils .""" - -import logging -import unittest - -from haystack import dump_loader -from haystack.reverse import context -from haystack.reverse import structure -from haystack.reverse import fieldtypes -from haystack.reverse import config - -from test.haystack import SrcTests - - -log = logging.getLogger('test_memory_mapping') - - -class TestMappingsLinux(SrcTests): - - @classmethod - def setUpClass(cls): - cls.memory_handler = dump_loader.load('test/dumps/ssh/ssh.1') - - @classmethod - def tearDownClass(cls): - cls.memory_handler.reset_mappings() - cls.memory_handler = None - - def test_get_context(self): - # FIXME, move to reverser - # print ''.join(['%s\n'%(m) for m in _memory_handler]) - with self.assertRaises(ValueError): - context.get_context_for_address(self.memory_handler, 0x0) - with self.assertRaises(ValueError): - context.get_context_for_address(self.memory_handler, 0xb76e12d3) - # [heap] - heap_address = context.get_context_for_address(self.memory_handler, 0xb84e02d3)._heap_start - self.assertEqual(heap_address, self.memory_handler.get_mapping_for_address(0xb84e02d3).start) - - -class TestMappingsWindows(SrcTests): - - @classmethod - def setUpClass(cls): - cls.memory_handler = dump_loader.load('test/dumps/putty/putty.1.dump') - cls.my_target = cls.memory_handler.get_target_platform() - cls.my_ctypes = cls.my_target.get_target_ctypes() - cls.my_utils = cls.my_target.get_target_ctypes_utils() - - @classmethod - def tearDownClass(cls): - cls.memory_handler.reset_mappings() - cls.memory_handler = None - cls.my_target = None - cls.my_ctypes = None - cls.my_utils = None - - def test_get_context(self): - """ - - :return: - """ - # print ''.join(['%s\n'%(m) for m in _memory_handler]) - with self.assertRaises(ValueError): - context.get_context_for_address(self.memory_handler, 0x0) - with self.assertRaises(ValueError): - context.get_context_for_address(self.memory_handler, 0xb76e12d3) - #[heap] children - heap_address = context.get_context_for_address(self.memory_handler, 0x0062d000)._heap_start - self.assertEqual(heap_address, self.memory_handler.get_mapping_for_address(0x005c0000).start) - heap_address = context.get_context_for_address(self.memory_handler, 0x0063e123)._heap_start - self.assertEqual(heap_address,self.memory_handler.get_mapping_for_address(0x005c0000).start) - - -class TestProcessContext(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.dumpname = 'test/src/test-ctypes6.32.dump' - config.remove_cache_folder(cls.dumpname) - cls.memory_handler = dump_loader.load(cls.dumpname) - cls.my_target = cls.memory_handler.get_target_platform() - cls.my_ctypes = cls.my_target.get_target_ctypes() - cls.my_utils = cls.my_target.get_target_ctypes_utils() - - @classmethod - def tearDownClass(cls): - cls.memory_handler.reset_mappings() - cls.memory_handler = None - cls.my_target = None - cls.my_ctypes = None - cls.my_utils = None - config.remove_cache_folder(cls.dumpname) - - def test_save_record_type(self): - process_context = self.memory_handler.get_reverse_context() - - _record = structure.AnonymousRecord(self.memory_handler, 0xdeadbeef, 40) - word_size = self.my_target.get_word_size() - - f1 = fieldtypes.Field('f1', 0*word_size, fieldtypes.ZEROES, word_size, False) - f2 = fieldtypes.Field('f2', 1*word_size, fieldtypes.ZEROES, word_size, False) - fields = [f1, f2] - _record_type = structure.RecordType('struct_test', 2*word_size, fields) - _record.set_record_type(_record_type) - # same fields - self.assertEqual(f1, _record.get_fields()[0]) - self.assertEqual(f1, _record.get_field('f1')) - # get_fields return a new list of fields - x = _record.get_fields() - self.assertEqual(x, _record.get_fields()) - x.pop(0) - self.assertNotEqual(x, _record.get_fields()) - - process_context.add_reversed_type(_record_type, [1,2,3]) - - r_types = list(process_context.list_reversed_types()) - self.assertEqual(r_types[0].name, 'struct_test') - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - # logging.basicConfig(level=logging.DEBUG) - #logging.getLogger("listmodel").setLevel(level=logging.DEBUG) - unittest.main(verbosity=0) \ No newline at end of file diff --git a/test/haystack/reverse/test_fieldtypes.py b/test/haystack/reverse/test_fieldtypes.py deleted file mode 100644 index 221bf05c..00000000 --- a/test/haystack/reverse/test_fieldtypes.py +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""Tests for haystack.reverse.structure.""" - -from __future__ import print_function -import logging -import unittest - -from haystack.reverse import context -from haystack.reverse import config -from haystack.reverse.heuristics import dsa -from haystack.reverse import fieldtypes -from haystack.reverse import structure -from haystack import dump_loader - -from test.haystack import SrcTests - -log = logging.getLogger('test_fieldtypes') - - -class TestField(SrcTests): - - @classmethod - def setUpClass(cls): - #self.context3 = context.get_context('test/src/test-ctypes3.dump') - cls.dumpname = 'test/src/test-ctypes6.32.dump' - config.remove_cache_folder(cls.dumpname) - - cls.memory_handler = dump_loader.load(cls.dumpname) - cls._target = cls.memory_handler.get_target_platform() - finder = cls.memory_handler.get_heap_finder() - heap_walker = finder.list_heap_walkers()[0] - heap_addr = heap_walker.get_heap_address() - - cls._load_offsets_values(cls.memory_handler.get_name()) - - cls.context6 = context.get_context_for_address(cls.memory_handler, heap_addr) - cls.dsa = dsa.FieldReverser(cls.context6.memory_handler) - cls.st = cls.context6.listStructures()[0] - - @classmethod - def tearDownClass(cls): - config.remove_cache_folder(cls.dumpname) - - def setUp(self): - pass - - def tearDown(self): - pass - - def test_zeroes(self): - z1 = fieldtypes.ZeroField('one', 0, 1) - self.assertEqual(len(z1), 1) - self.assertIn('ctypes.c_ubyte*1 )', z1.to_string('\x00\x00\x00\x00')) - - z2 = fieldtypes.ZeroField('two', 0, 2) - self.assertEqual(len(z2), 2) - self.assertIn('ctypes.c_ubyte*2 )', z2.to_string('\x00\x00\x00\x00')) - - def test_gaps(self): - g1 = fieldtypes.Field('gap_0', 0, fieldtypes.UNKNOWN, 1, False) - self.assertEqual(len(g1), 1) - self.assertTrue(g1.is_gap()) - print(g1.to_string('\x00\x00\x00\x00')) - self.assertIn('ctypes.c_ubyte*1 )', g1.to_string('\x00\x00\x00\x00')) - - g2 = fieldtypes.Field('gap_0', 0, fieldtypes.UNKNOWN, 2, False) - self.assertEqual(len(g2), 2) - self.assertIn('ctypes.c_ubyte*2 )', g2.to_string('\x00\x00\x00\x00')) - - def test_is_types(self): - # def __init__(self, astruct, offset, typename, size, isPadding): - ptr = fieldtypes.PointerField('ptr_0', 8, 4) - self.assertFalse(ptr.is_string()) - self.assertTrue(ptr.is_pointer()) - self.assertFalse(ptr.is_zeroes()) - self.assertFalse(ptr.is_array()) - self.assertFalse(ptr.is_integer()) - - def test_equals(self): - start = self.offsets['start_list'][0] - _record = structure.AnonymousRecord(self.memory_handler, start, 40) - word_size = self._target.get_word_size() - - f1 = fieldtypes.Field('f1', 0*word_size, fieldtypes.ZEROES, word_size, False) - f2 = fieldtypes.Field('f2', 1*word_size, fieldtypes.ZEROES, word_size, False) - fields = [f1, f2] - _record_type = structure.RecordType('struct_text', 2*word_size, fields) - _record.set_record_type(_record_type) - - self.assertEqual(f1, _record.get_fields()[0]) - self.assertEqual(f1, _record.get_field('f1')) - - def test_subtype(self): - start = self.offsets['start_list'][0] - _record = structure.AnonymousRecord(self.memory_handler, start, 40) - word_size = self._target.get_word_size() - - f1 = fieldtypes.Field('f1', 0*word_size, fieldtypes.ZEROES, word_size, False) - f4 = fieldtypes.Field('f2', 3*word_size, fieldtypes.ZEROES, word_size, False) - # offset in the substruct - fs2 = fieldtypes.PointerField('Back', 0, word_size) - fs2.value = start - fs3 = fieldtypes.PointerField('Next', 1*word_size, word_size) - fs3.value = start - # the new field sub record - new_field = fieldtypes.RecordField(_record, 1*word_size, 'list', 'LIST_ENTRY', [fs2, fs3]) - # fieldtypes.FieldType.makeStructField(_record, 1*word_size, 'LIST_ENTRY', [fs2, fs3], 'list') - # add them - fields = [f1, new_field, f4] - #_record.add_fields(fields) - _record_type = structure.RecordType('struct_text', 40, fields) - _record.set_record_type(_record_type) - self.assertEqual(len(_record), 40) - f1, f2, f3 = _record.get_fields() - self.assertEqual(len(f1), word_size) - self.assertEqual(len(f2), word_size*2) - self.assertEqual(len(f3), word_size) - - self.assertEqual(f2.name, 'list') - self.assertIsInstance(f2.field_type, fieldtypes.FieldTypeStruct) - self.assertEqual(f2.field_type.name, 'LIST_ENTRY') - - print(_record.to_string()) - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - # logging.getLogger("test_fieldtypes").setLevel(level=logging.DEBUG) - # logging.getLogger("reversers").setLevel(level=logging.DEBUG) - # logging.getLogger("structure").setLevel(level=logging.DEBUG) - # logging.getLogger("field").setLevel(level=logging.DEBUG) - # logging.getLogger("re_string").setLevel(level=logging.DEBUG) - unittest.main(verbosity=0) - #suite = unittest.TestLoader().loadTestsFromTestCase(TestFunctions) - # unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/test/haystack/reverse/test_pattern.py b/test/haystack/reverse/test_pattern.py deleted file mode 100644 index 7b7e098e..00000000 --- a/test/haystack/reverse/test_pattern.py +++ /dev/null @@ -1,412 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# - -from __future__ import print_function -import logging -import operator -import unittest -import struct - -import os - -from haystack import target -from haystack.reverse import pattern -from haystack.mappings.base import MemoryHandler, AMemoryMapping -from haystack.mappings.file import LocalMemoryMapping - -__author__ = "Loic Jaquemet loic.jaquemet+python@gmail.com" - -''' -Testing pointer patterns recognition. -''' - -log = logging.getLogger('test_pattern') - -class SignatureTests(unittest.TestCase): - """ - Helper class for signature tests - """ - - # a example pattern of interval between pointers - #seq = [4, 4, 8, 128, 4, 8, 4, 4, 12] - - @classmethod - def setUpClass(cls): - # make a fake dir - try: - os.mkdir('test/reverse/') - except OSError as e: - pass - try: - os.mkdir('test/reverse/fakedump') - except OSError as e: - pass - try: - os.mkdir('test/reverse/fakedump/cache') - except OSError as e: - pass - - def setUp(self): - # x64 - # FIXME, do base*word_size - self.seq = [8, 8, 16, 256, 8, 16, 8, 8, 24] - self.target = target.TargetPlatform.make_target_platform_local() - self.word_size = self.target.get_word_size() - - def _accumulate(self, iterable, func=operator.add): - """ - Translate an interval sequence to a absolute offset sequence - :param iterable: - :param func: - :return: - """ - it = iter(iterable) - total = next(it) - yield total - for element in it: - total = func(total, element) - yield total - - def _make_mmap(self, mstart, mlength, struct_offset, seq, word_size): - """ - Create memory mapping with some pointer values at specific - intervals. - :param mstart: - :param mlength: - :param struct_offset: - :param seq: - :param word_size: - :return: - """ - nsig = [struct_offset] - nsig.extend(seq) - # rewrite intervals indices to offsets from start - indices = [i for i in self._accumulate(nsig)] - dump = [] # b'' - values = [] - fmt = self.target.get_word_type_char() - # write a memory map with valid pointer address in specifics offsets. - for i in range(0, mlength, word_size): - if i in indices: - log.debug('Insert word %x at 0x%x',mstart + i,mstart + i) - dump.append(struct.pack(fmt, mstart + i)) - values.append(mstart + i) - else: - dump.append(struct.pack(fmt, 0x2e2e2e2e2e2e2e2e)) - - if len(dump) != mlength // word_size: - raise ValueError('error 1 on length dump %d ' % (len(dump))) - dump2 = b''.join(dump) - if len(dump) * word_size != len(dump2): - print(dump) - raise ValueError( - 'error 2 on length dump %d dump2 %d' % - (len(dump), len(dump2))) - stop = mstart + len(dump2) - mmap = AMemoryMapping(mstart, stop, '-rwx', 0, 0, 0, 0, 'test_mmap') - mmap.set_ctypes(self.target.get_target_ctypes()) - mmap2 = LocalMemoryMapping.fromBytebuffer(mmap, dump2) - # mmap2.set_ctypes(self.target.get_target_ctypes()) - return mmap2, values - - def _make_signature(self, intervals, struct_offset=None): - """ - Make a memory map, with a fake structure of pointer pattern inside. - Return the pattern signature - :param intervals: - :param struct_offset: - :return: - """ - # template of a memory map metadata - self._mstart = 0x0c00000 - self._mlength = 4096 # end at (0x0c01000) - # could be 8, it doesn't really matter - self.word_size = self.target.get_word_size() - if struct_offset is not None: - self._struct_offset = struct_offset - else: - self._struct_offset = self.word_size*12 # 12, or any other aligned - mmap, values = self._make_mmap(self._mstart, self._mlength, self._struct_offset, - intervals, self.word_size) - mappings = MemoryHandler([mmap], self.target, 'test/reverse/fakedump') - sig = pattern.PointerIntervalSignature(mappings, 'test_mmap') - return sig - - -class TestSignature(SignatureTests): - - def setUp(self): - super(TestSignature, self).setUp() - # Do not force ctypes to another platform, its useless - #self._target_platform = _target_platform.make_target_linux_32() - #self.seq = [4, 4, 8, 128, 4, 8, 4, 4, 12] - self.name = 'test_dump_1' - self.sig = self._make_signature(self.seq) - - def test_init(self): - # forget about the start of the mmap ( 0 to first pointer value) , its - # irrelevant - self.assertEqual(list(self.sig.sig[1:]), self.seq) - - def test_getAddressForPreviousPointer(self): - self.assertEqual( - self.sig.getAddressForPreviousPointer(0), - self._mstart) - self.assertEqual( - self.sig.getAddressForPreviousPointer(1), - self._mstart + - self._struct_offset) - self.assertEqual( - self.sig.getAddressForPreviousPointer(2), - self._mstart + - self._struct_offset + - self.word_size) - - def test_len(self): - self.assertEqual(len(self.sig), len(self.seq) + 1) - -# def tearDown(self): -# os.remove('test_dump_1.pinned') -# os.remove('test_dump_1.pinned.vaddr') -# os.remove('test_signature_1.pinned') -# os.remove('test_signature_1.pinned.vaddr') - - -class TestPinnedPointers(SignatureTests): - - def setUp(self): - super(TestPinnedPointers, self).setUp() - # PP.P...[..].PP.PPP..P - # forcing it on these unittest - #self._target_platform = _target_platform.make_target_linux_32() - #self.seq = [4, 4, 8, 128, 4, 8, 4, 4, 12] - self.offset = 1 # offset of the pinned pointer sequence in the sig - self.name = 'test_dump_1' - self.sig = self._make_signature(self.seq) - self.pp = pattern.PinnedPointers(self.seq, self.sig, self.offset, self.word_size) - - def test_init(self): - self.assertEqual( - self.pp.sequence, list(self.sig.sig[self.offset: self.offset + len(self.pp)])) - - def test_pinned(self): - self.assertEqual(self.pp.pinned(), self.seq) - self.assertEqual(len(self.pp.pinned(5)), 5) - self.assertEqual(self.pp.pinned(3), self.seq[0:3]) - - def test_len(self): - len_seq = len(self.seq) - self.assertEqual(len(self.pp), len_seq) - - def test_structlen(self): - len_struct = sum(self.seq) + self.word_size - self.assertEqual(self.pp.structLen(), len_struct) - - def test_cmp(self): - #seq = [4, 4, 8, 128, 4, 8, 4, 4, 12] - pp1 = pattern.PinnedPointers(self.seq[1:], self.sig, self.offset + 1, self.word_size) - pp2 = pattern.PinnedPointers(self.seq[1:-1], self.sig, self.offset + 1, self.word_size) - pp3 = pattern.PinnedPointers(self.seq[:-1], self.sig, self.offset + 1, self.word_size) - pp4 = pattern.PinnedPointers(self.seq[:], self.sig, self.offset + 1, self.word_size) - - #seq = [4, 8, 4, 128, 4, 8, 4, 4, 12] - seq = [8, 16, 8, 256, 8, 16, 8, 8, 24] - pp5 = pattern.PinnedPointers(seq, self.sig, self.offset, self.word_size) - - self.assertNotEqual(pp1, self.pp) - self.assertNotEqual(pp2, self.pp) - self.assertNotEqual(pp3, self.pp) - self.assertEqual(pp4, self.pp) - self.assertNotEqual(pp5, self.pp) - - # def test_contains(self): - # seq = [4,4,8,128,4,8,4,4,12] - # pp1 = pattern.PinnedPointers(seq[1:], self.sig, self.offset+1) - # pp2 = pattern.PinnedPointers(seq[1:-1], self.sig, self.offset+1) - # pp3 = pattern.PinnedPointers(seq[:-1], self.sig, self.offset+1) - # pp4 = pattern.PinnedPointers(seq[:], self.sig, self.offset+1)# - # seq = [4,8,4,128,4,8,4,4,12] - # pp5 = pattern.PinnedPointers(seq, self.sig, self.offset) - # - # #self.assertRaises( ValueError, r'ValueError', seq in self.pp ) - # self.assertIn( pp1 , self.pp ) - # self.assertIn( pp2 , self.pp ) - # self.assertIn( pp3 , self.pp ) - # self.assertIn( pp4 , self.pp ) - # self.assertIn( pp5 , self.pp ) - - def test_getAddress(self): - self.assertEqual( - self.pp.getAddress(), - self._mstart + - self._struct_offset) - self.assertEqual( - self.pp.getAddress(0), - self._mstart + - self._struct_offset) - self.assertEqual(self.pp.getAddress( - 1), self._mstart + self._struct_offset + sum(self.seq[:1])) - self.assertEqual(self.pp.getAddress( - 2), self._mstart + self._struct_offset + sum(self.seq[:2])) - - -class TestAnonymousStructRange(SignatureTests): - - def setUp(self): - super(TestAnonymousStructRange, self).setUp() - # .....PP.P...[..].PP.PPP..P - # forcing it on these unittest - #self._target_platform = _target_platform.make_target_linux_32() - #self.seq = [4, 4, 8, 128, 4, 8, 4, 4, 12] - self.offset = 1 # we need to skip the start -> first pointer part - self.name = 'struct_1' - self.sig = self._make_signature(self.seq) - self.pp = pattern.PinnedPointers(self.seq, self.sig, self.offset, self.word_size) - self.astruct = pattern.AnonymousStructRange(self.pp, self.word_size) - - def test_len(self): - len_struct = sum(self.seq) + self.word_size - self.assertEqual(len(self.astruct), len_struct) - self.assertEqual(len(self.astruct), self.pp.structLen()) - - def test_getPointersAddr(self): - ret = self.astruct.getPointersAddr() - tmp = [self._mstart, self._struct_offset] - tmp.extend(self.seq) - addresses = [i for i in self._accumulate(tmp)] - addresses.pop(0) # ignore address of start mmap - - self.assertEqual(len(ret), len(addresses)) - self.assertEqual(ret, addresses) - - def test_getPointersValues(self): - ret = self.astruct.getPointersValues() - addrs = self.astruct.getPointersAddr() - tmp = [self._mstart, self._struct_offset] - tmp.extend(self.seq) - addresses = [i for i in self._accumulate(tmp)] - addresses.pop(0) # ignore address of start mmap - - self.assertEqual(len(ret), len(addresses)) - self.assertEqual(len(ret), len(addrs)) - # pointer value is the pointer vaddr on first test case - for addr, val in zip(addrs, ret): - memval = self.sig.mmap.read_word(addr) - self.assertEqual(memval, val) - self.assertEqual(addr, val) - - def test_contains(self): - START = self._mstart + self._struct_offset - STOP = START + len(self.astruct) - - self.assertIn(START, self.astruct) - self.assertIn(START + 1, self.astruct) - self.assertIn(STOP, self.astruct) - self.assertIn(STOP - 1, self.astruct) - - self.assertNotIn(STOP + 1, self.astruct) - self.assertNotIn(START - 1, self.astruct) - - -class TestFunctions(unittest.TestCase): - - def test_findPattern_recursive_1(self): - sig = '''P4I4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z'''\ - '''4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u4z4P4I4u172z4I4T'''\ - '''8z4I4z4I4T8z4I4z4I4T8z4I4z4I4T8z4I4z4u4z26336''' - sig_res = 'P4 (I4){2} (u4z4P4I4){21} u172z4 (I4T8z4I4z4){4} u4z26336' - - self.assertEqual(pattern.findPatternText(sig, 2), sig_res) - - def test_findPattern_recursive_2(self): - sig = '''P4i4i4u9z8i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4T5I4P4i4T5I4P4i4I4I4P4i4T5'''\ - '''I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4'''\ - '''I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4I4I4P4i4T5I4P4I4i4''' - sig_res = 'P4 (i4){2} u9z8i4 (I4I4P4i4){7} (T5I4P4i4){2} (I4){2} P4i4T5I4P4i4 (I4I4P4i4){17} T5I4P4I4i4' - - self.assertEqual(pattern.findPatternText(sig, 2), sig_res) - - def test_findPattern_recursive_3(self): - sig = '''I4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4z12''' - sig_res = 'I4 (i4){31} z12' - # print pattern.findPatternText( sig,2) - # self.assertRaises ( ValueError, pattern.findPatternText , sig,2) # - # why ? - self.assertEqual(pattern.findPatternText(sig, 2), sig_res) - - # def test_findPattern_recursive_3b(self): - # sig = '''I4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4z2''' - # sig_res = 'I4 (i4){31} z2' - # self.assertEquals ( pattern.findPatternText(sig,2), sig_res) - - def test_findPattern_recursive_4(self): - sig = '''123321444567444567444567444567111123''' - sig_res = '123321 (444567){4} 111123' - self.assertEqual(pattern.findPatternText(sig, 3), sig_res) - - def test_findPattern_recursive_5(self): - sig = '''AAABABABABBAAABBBBABACBCBCBCBABCBABABC''' - sig_res = ' (A){2} (AB){4} B (A){3} (B){3} (BA){2} (CB){3} (CBAB){2} ABC' - self.assertEqual(pattern.findPatternText(sig, 1), sig_res) - - def test_findPattern_recursive_6(self): - sig = '''aaaaa1111bbbccda2a2a2a2a2b1cb1cb1cb1cabcdabcdabcdabcdpooiiiuuuuyyyyy''' - sig_res = ' (a){5} (1){4} (b){3} (c){2} d (a2){5} (b1c){4} (abcd){4} p (o){2} (i){3} (u){4} (y){5} ' - self.assertEqual(pattern.findPatternText(sig, 1), sig_res) - - def test_findPattern_recursive_7(self): - sig = '''aaaaa1111bbbccda2a2a2a2a2b1cb1cb1cb1cabcdabcdabcdabcdpooiiiuuuuyyyyy''' - sig_res = ' (a){5} (1){4} (b){3} ccd (a2){5} (b1c){4} (abcd){4} poo (i){3} (u){4} (y){5} ' - self.assertEqual(pattern.findPatternText(sig, 1, 3), sig_res) - - # def test_findPattern_recursive_8(self): - # sig = '''I4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4z12''' - # sig_res = 'I4 (i4){31} z12' - # self.assertRaises ( ValueError, pattern.findPatternText , sig,2,4) - - def test_findPattern_recursive_8b(self): - sig = '''I4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4z1''' - sig_res = 'I4 (i4){31} z1' - self.assertEqual(pattern.findPatternText(sig, 2, 4), sig_res) - - def test_findPattern_recursive_9(self): - sig = '''aaaaa1111bbbccda2a2a2a2a2b1cb1cb1cb1cabcdabcdabcdabcdpooiiiuuuuyyyyy''' - sig_res = ' (a){5} 1111bbbccd (a2){5} b1cb1cb1cb1cabcdabcdabcdabcdpooiiiuuuu (y){5} ' - self.assertEqual(pattern.findPatternText(sig, 1, 5), sig_res) - - -class TestPatternEncoder(unittest.TestCase): - - def test_makePattern_1(self): - sig = ['P4', - 'I4', - 'I4'] + (['u4', - 'z12', - 'P4', - 'I4'] * 21) + ['u172', - 'z12'] + (['I4', - 'T8', - 'z12', - 'I4', - 'z12'] * 4) + ['u4', - 'z26336'] - encoder = pattern.PatternEncoder(sig, 3) - #sig_res = 'P4 (I4){2} (u4z12P4I4){21} u172z12 (I4T8z12I4z12){4} u4z26336' - sig_res = [ - (1, 'P4'), (1, 'I4'), (1, 'I4'), (21, [ - 'u4', 'z12', 'P4', 'I4']), (1, 'u172'), (1, 'z12'), (4, [ - 'I4', 'T8', 'z12', 'I4', 'z12']), (1, 'u4'), (1, 'z26336')] - - self.assertEqual(encoder.makePattern(), sig_res) - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - #logging.getLogger('haystack').setLevel(logging.INFO) - #logging.getLogger('pattern').setLevel(logging.DEBUG) - unittest.main(verbosity=2) - #suite = unittest.TestLoader().loadTestsFromTestCase(TestFunctions) - # unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/test/haystack/reverse/test_pointerfinder.py b/test/haystack/reverse/test_pointerfinder.py deleted file mode 100644 index 033daa15..00000000 --- a/test/haystack/reverse/test_pointerfinder.py +++ /dev/null @@ -1,313 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2011 Loic Jaquemet loic.jaquemet+python@gmail.com -# -from __future__ import print_function -import haystack.reverse.enumerators -import haystack.reverse.matchers - -import unittest - -from haystack.mappings.base import MemoryHandler -from haystack.mappings.base import AMemoryMapping -from haystack.mappings.file import LocalMemoryMapping - -from haystack import dump_loader -from haystack.reverse import searchers - -from . import test_pattern - -from test.testfiles import zeus_856_svchost_exe - -import timeit -import logging - -log = logging.getLogger('test_pointerfinder') - -class TestPointer(test_pattern.SignatureTests): - - def setUp(self): - super(TestPointer, self).setUp() - self.mmap, self.values = self._make_mmap_with_values(self.seq) - self.name = 'test_dump_1' - self.feedback = searchers.NoFeedback() - - def _make_mmap_with_values(self, intervals, struct_offset=None): - """ - Make a memory map, with a fake structure of pointer pattern inside. - Return the pattern signature - - :param intervals: - :param struct_offset: - :return: - """ - # template of a memory map metadata - self._mstart = 0x0c00000 - self._mlength = 4096 # end at (0x0c01000) - # could be 8, it doesn't really matter - self.word_size = self.target.get_word_size() - if struct_offset is not None: - self._struct_offset = struct_offset - else: - self._struct_offset = self.word_size*12 # 12, or any other aligned - mmap,values = self._make_mmap(0x0c00000, 4096, self._struct_offset, - intervals, self.word_size) - # add a reference to mmap in mmap2 - ammap2 = AMemoryMapping(0xff7dc000, 0xff7dc000+0x1000, '-rwx', 0, 0, 0, 0, 'test_mmap2') - ammap2.set_ctypes(self.target.get_target_ctypes()) - mmap2 = LocalMemoryMapping.fromBytebuffer(ammap2, mmap.get_byte_buffer()) - self._memory_handler = MemoryHandler([mmap, mmap2], self.target, 'test') - self.mmap2 = mmap2 - return mmap, values - - -class TestPointerSearcher(TestPointer): - - def test_iter(self): - matcher = haystack.reverse.matchers.PointerSearcher(self._memory_handler) - self.pointerSearcher = searchers.WordAlignedSearcher(self.mmap, matcher, self.feedback, self.word_size) - iters = [value for value in self.pointerSearcher] - values = self.pointerSearcher.search() - self.assertEqual(iters, values) - self.assertEqual(self.values, values) - self.assertEqual(self.values, iters) - - -class TestPointerEnumerator(TestPointer): - - def test_iter(self): - matcher = haystack.reverse.matchers.PointerEnumerator(self._memory_handler) - self.pointerEnum = haystack.reverse.enumerators.WordAlignedEnumerator(self.mmap, matcher, self.feedback, self.word_size) - values = [value for offset, value in self.pointerEnum] - offsets = [offset for offset, value in self.pointerEnum] - values_2 = [value for offset, value in self.pointerEnum.search()] - offsets_2 = [offset for offset, value in self.pointerEnum.search()] - - self.assertEqual(values, values_2) - self.assertEqual(offsets, offsets_2) - self.assertEqual(self.values, values) - self.assertEqual(self.values, values_2) - - nsig = [self._mstart + self._struct_offset] - nsig.extend(self.seq) - indices = [i for i in self._accumulate(nsig)] - self.assertEqual(indices, offsets) - self.assertEqual(indices, offsets_2) - - def test_iter_advanced(self): - """test that pointers to other mappings are detected""" - matcher = haystack.reverse.matchers.PointerEnumerator(self._memory_handler) - self.pointerEnum1 = haystack.reverse.enumerators.WordAlignedEnumerator(self.mmap, matcher, self.feedback, self.word_size) - offsets1, values1 = zip(*self.pointerEnum1.search()) - self.pointerEnum2 = haystack.reverse.enumerators.WordAlignedEnumerator(self.mmap2, matcher, self.feedback, self.word_size) - offsets2, values2 = zip(*self.pointerEnum2.search()) - - self.assertEqual(values1, values2) - self.assertEqual(len(values1), len(self.seq)+1) - -class TestPointerEnumeratorReal(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls._memory_handler = dump_loader.load(zeus_856_svchost_exe.dumpname) - #cls._memory_handler = dump_loader.load(putty_1_win7.dumpname) - cls._utils = cls._memory_handler.get_target_platform().get_target_ctypes_utils() - return - - @classmethod - def tearDownClass(cls): - cls._utils = None - cls._memory_handler.reset_mappings() - cls._memory_handler = None - return - - def setUp(self): - self._heap_finder = self._memory_handler.get_heap_finder() - return - - def tearDown(self): - self._heap_finder = None - return - - def _stats(self, heap_addrs): - # get the weight per mapping - mapdict = {} - for m in self._memory_handler.get_mappings(): - mapdict[m.start] = 0 - for addr in heap_addrs: - m = self._memory_handler.get_mapping_for_address(addr) - mapdict[m.start] += 1 - - res = [(v,k) for k,v, in mapdict.items()] - res.sort() - res.reverse() - print('Most used mappings:') - for cnt,s in res: - if cnt == 0: - continue - m = self._memory_handler.get_mapping_for_address(s) - print(cnt, m) - - def test_pointer_enumerators(self): - """ - Search pointers values in one HEAP - :return: - """ - # prep the workers - dumpfilename = self._memory_handler.get_name() - word_size = self._memory_handler.get_target_platform().get_word_size() - feedback = searchers.NoFeedback() - matcher = haystack.reverse.matchers.PointerEnumerator(self._memory_handler) - finder = self._memory_handler.get_heap_finder() - walkers = finder.list_heap_walkers() - walker = walkers[0] - heap_addr = walker.get_heap_address() - heap = walker.get_heap_mapping() - # create the enumerator on the whole mapping - enumerator1 = haystack.reverse.enumerators.WordAlignedEnumerator(heap, matcher, feedback, word_size) - # collect the pointers - if False: - ### - ts1 = timeit.timeit(enumerator1.search, number=3) - import cProfile, pstats, StringIO - pr = cProfile.Profile() - pr.enable() - # ... do something ... - heap_enum = enumerator1.search() - pr.disable() - s = StringIO.StringIO() - sortby = 'cumulative' - ps = pstats.Stats(pr, stream=s).sort_stats(sortby) - ps.print_stats() - print(s.getvalue()) - ### - else: - heap_enum = enumerator1.search() - ts1 = 0.0 - heap_addrs1, heap_values1 = zip(*heap_enum) - print('WordAlignedEnumerator: %d pointers, timeit %0.2f' % (len(heap_addrs1), ts1)) - - self._stats(heap_addrs1) - - def test_pointer_enumerators_allocated(self): - """ - Search pointers values in allocated chunks from one HEAP - :return: - """ - # prep the workers - word_size = self._memory_handler.get_target_platform().get_word_size() - feedback = searchers.NoFeedback() - matcher = haystack.reverse.matchers.PointerEnumerator(self._memory_handler) - finder = self._memory_handler.get_heap_finder() - walkers = finder.list_heap_walkers() - heap_walker = walkers[0] - # create the enumerator on the allocated chunks mapping - enumerator2 = haystack.reverse.enumerators.AllocatedWordAlignedEnumerator(heap_walker, matcher, feedback, word_size) - # collect the pointers - if False: - ### - ts2 = timeit.timeit(enumerator2.search, number=3) - import cProfile, pstats, StringIO - pr = cProfile.Profile() - pr.enable() - # ... do something ... - heap_enum2 = enumerator2.search() - pr.disable() - s = StringIO.StringIO() - sortby = 'cumulative' - ps = pstats.Stats(pr, stream=s).sort_stats(sortby) - ps.print_stats() - print(s.getvalue()) - ### - else: - heap_enum2 = enumerator2.search() - ts2 = 0.0 - heap_addrs2, heap_values2 = zip(*heap_enum2) - logging.debug('AllocatedWordAlignedEnumerator: %d pointers, timeit %0.2f', len(heap_addrs2), ts2) - - self._stats(heap_addrs2) - - def test_pointer_enumerators_all(self): - """ - Search pointers values in all HEAP - :return: - """ - # prep the workers - word_size = self._memory_handler.get_target_platform().get_word_size() - feedback = searchers.NoFeedback() - matcher = haystack.reverse.matchers.PointerEnumerator(self._memory_handler) - finder = self._memory_handler.get_heap_finder() - walkers = finder.list_heap_walkers() - all_heaps_addrs = [] - for walker in walkers: - #if heap.start != 0x03360000: - # continue - heap = walker.get_heap_mapping() - log.debug('heap is %s', heap) - # create the enumerator on the allocated chunks mapping - enumerator2 = haystack.reverse.enumerators.WordAlignedEnumerator(heap, matcher, feedback, word_size) - # collect the pointers - heap_enum2 = enumerator2.search() - ts2 = 0.0 - if len(heap_enum2) == 0: - logging.debug('Heap %s has no pointers in allocated blocks', heap) - else: - heap_addrs2, heap_values2 = zip(*heap_enum2) - logging.debug('WordAlignedEnumerator: %d pointers, timeit %0.2f', len(heap_addrs2), ts2) - all_heaps_addrs.extend(heap_addrs2) - ## - if False: - print("Pointers:") - for k,v in heap_enum2: - print(hex(k), hex(v)) - - self._stats(all_heaps_addrs) - - def test_pointer_enumerators_allocated_all(self): - """ - Search pointers values in allocated chunks from all HEAP - :return: - """ - # prep the workers - word_size = self._memory_handler.get_target_platform().get_word_size() - feedback = searchers.NoFeedback() - matcher = haystack.reverse.matchers.PointerEnumerator(self._memory_handler) - finder = self._memory_handler.get_heap_finder() - walkers = finder.list_heap_walkers() - all_heaps_addrs = [] - for heap_walker in walkers: - #if heap.start != 0x03360000: - # continue - heap = heap_walker.get_heap_mapping() - log.debug('heap is %s', heap) - # create the enumerator on the allocated chunks mapping - enumerator2 = haystack.reverse.enumerators.AllocatedWordAlignedEnumerator(heap_walker, matcher, feedback, word_size) - # collect the pointers - heap_enum2 = enumerator2.search() - ts2 = 0.0 - if len(heap_enum2) == 0: - logging.debug('Heap %s has no pointers in allocated blocks', heap) - else: - heap_addrs2, heap_values2 = zip(*heap_enum2) - logging.debug('AllocatedWordAlignedEnumerator: %d pointers, timeit %0.2f', len(heap_addrs2), ts2) - all_heaps_addrs.extend(heap_addrs2) - ## - if False: - print("Pointers:") - for k,v in heap_enum2: - print(hex(k), hex(v)) - print("Allocations:") - for addr, size in heap_walker.get_user_allocations(): - print(hex(addr), '->', hex(addr+size), '(%x)'%size) - print("Free chunks:") - for addr, size in heap_walker.get_free_chunks(): - print(hex(addr), '->', hex(addr+size), '(%x)'%size) - - self._stats(all_heaps_addrs) - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - # logging.getLogger("test_pointerfinder").setLevel(logging.DEBUG) - unittest.main() diff --git a/test/haystack/reverse/test_re_string.py b/test/haystack/reverse/test_re_string.py deleted file mode 100644 index f0a56188..00000000 --- a/test/haystack/reverse/test_re_string.py +++ /dev/null @@ -1,203 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""Tests for haystack.reverse.structure.""" - -from __future__ import print_function -import logging -import unittest - -from haystack.reverse import re_string - -__author__ = "Loic Jaquemet" -__copyright__ = "Copyright (C) 2012 Loic Jaquemet" -__license__ = "GPL" -__maintainer__ = "Loic Jaquemet" -__email__ = "loic.jaquemet+python@gmail.com" -__status__ = "Production" - - -class TestReString(unittest.TestCase): - - @classmethod - def setUpClass(cls): - # context.get_context('test/src/test-ctypes3.dump') - cls.context = None - cls.test1 = b'''C\x00:\x00\\\x00U\x00s\x00e\x00r\x00s\x00\\\x00j\x00a\x00l\x00\\\x00A\x00p\x00p\x00D\x00a\x00t\x00a\x00\\\x00R\x00o\x00a\x00m\x00i\x00n\x00g\x00\\\x00M\x00i\x00c\x00r\x00o\x00s\x00o\x00f\x00t\x00\\\x00I\x00n\x00t\x00e\x00r\x00n\x00e\x00t\x00 \x00E\x00x\x00p\x00l\x00o\x00r\x00e\x00r\x00\\\x00Q\x00u\x00i\x00c\x00k\x00 \x00L\x00a\x00u\x00n\x00c\x00h\x00\\\x00d\x00e\x00s\x00k\x00t\x00o\x00p\x00.\x00i\x00n\x00i\x00\x00\x00''' - cls.test2 = b'''\x4C\x00\x6F\x00\xEF\x00\x63\x00\x20\x00\x4A\x00\x61\x00\x71\x00\x75\x00\x65\x00\x6D\x00\x65\x00\x74\x00\x00\x00''' - cls.test3 = b'''\\\x00R\x00E\x00G\x00I\x00S\x00T\x00R\x00Y\x00\\\x00U\x00S\x00E\x00R\x00\\\x00S\x00-\x001\x00-\x005\x00-\x002\x001\x00-\x002\x008\x008\x004\x000\x006\x003\x000\x007\x003\x00-\x003\x003\x002\x009\x001\x001\x007\x003\x002\x000\x00-\x003\x008\x001\x008\x000\x003\x009\x001\x009\x009\x00-\x001\x000\x000\x000\x00_\x00C\x00L\x00A\x00S\x00S\x00E\x00S\x00\\\x00W\x00o\x00w\x006\x004\x003\x002\x00N\x00o\x00d\x00e\x00\\\x00C\x00L\x00S\x00I\x00D\x00\\\x00{\x007\x006\x007\x006\x005\x00B\x001\x001\x00-\x003\x00F\x009\x005\x00-\x004\x00A\x00F\x002\x00-\x00A\x00C\x009\x00D\x00-\x00E\x00A\x005\x005\x00D\x008\x009\x009\x004\x00F\x001\x00A\x00}\x00''' - cls.test4 = b'''edrtfguyiopserdtyuhijo45567890oguiy4e65rtiu\x07\x08\x09\x00''' - cls.test5 = b'''edrt\x00fguyiopserdtyuhijo45567890oguiy4e65rtiu\xf1\x07\x08\x09\x00\x00''' - cls.test6 = b'''\xf3drtfguyiopserdtyuhijo45567890oguiy4e65rtiu\xf1\x07\x08\x09\x00''' - cls.test7 = b'\x1e\x1c\x8c\xd8\xcc\x01\x00' # pure crap - cls.test8 = b'C\x00:\x00\\\x00W\x00i\x00n\x00d\x00o\x00w\x00s\x00\\\x00S\x00y\x00s\x00t\x00e\x00m\x003\x002\x00\\\x00D\x00r\x00i\x00v\x00e\x00r\x00S\x00t\x00o\x00r\x00e\x00\x00\x00\xf1/\xa6\x08\x00\x00\x00\x88,\x00\x00\x00C\x00:\x00\\\x00P\x00r\x00o\x00g\x00r\x00a\x00m\x00 \x00F\x00i\x00l\x00e\x00s\x00 \x00(\x00x\x008\x006\x00)\x00\x00\x00P\x00u\x00T\x00Y\x00' - cls.test9 = b'\x01\x01@\x00C\x00:\x00\\\x00W\x00i\x00n\x00d\x00o\x00w\x00s\x00' - cls.test10 = b'''\x4C\x6F\xEF\x63\x20\x4A\x61\x71\x75\x65\x6D\x65\x74''' - cls.test11 = b'D\x00c\x00o\x00m\x00L\x00a\x00u\x00n\x00c\x00h\x00\x00\x00T\x00e\x00r\x00m\x00S\x00e\x00r\x00v\x00i\x00c\x00e\x00\x00\x00\x00\x00' - - def setUp(self): - pass - - def tearDown(self): - pass - - def test_startsWithNulTerminatedString(self): - # self.skipTest('') - - size, codec, txt = re_string.startsWithNulTerminatedString(self.test1) - self.assertEqual(size, len(self.test1)) - - pass - - @unittest.expectedFailure - def test_try_decode_string(self): - # self.skipTest('') - - size, codec, txt = re_string.try_decode_string(self.test1) - self.assertEqual(size, len(self.test1)) - - size, codec, txt = re_string.try_decode_string(self.test2) - self.assertEqual(size, len(self.test2)) - - size, codec, txt = re_string.try_decode_string(self.test3) - self.assertEqual(size, len(self.test3)) - - size, codec, txt = re_string.try_decode_string(self.test4) - self.assertEqual(size, len(self.test4) - 4) - - size, codec, txt = re_string.try_decode_string(self.test5) - self.assertEqual(size, len(self.test5) - 5) - - ret = re_string.try_decode_string(self.test7) - self.assertFalse(ret) - - size, codec, txt = re_string.try_decode_string(self.test8) - self.assertEqual(size, len(self.test8)) - - pass - - def test_testEncoding(self): - # self.skipTest('') - - uni = self.test1 - size, encoded = re_string.testEncoding(uni, 'utf-16le') - self.assertEqual(size, len(uni)) - - x3 = self.test2 - size, encoded = re_string.testEncoding(x3, 'utf-16le') - self.assertEqual(size, len(x3)) - - size, encoded = re_string.testEncoding(self.test4, 'utf-16le') - self.assertEqual(size, -1) - - size, encoded = re_string.testEncoding(self.test4, 'utf-8') - self.assertEqual(size, len(self.test4)) - - pass - - def test_testAllEncodings(self): - - # self.skipTest('') - - uni = self.test1 - solutions = re_string.testAllEncodings(uni) - size, codec, encoded = solutions[0] - self.assertEqual(size, len(uni), '%s' % codec) - - x3 = self.test2 - solutions = re_string.testAllEncodings(x3) - size, codec, encoded = solutions[0] - self.assertEqual(size, len(x3)) - - solutions = re_string.testAllEncodings(self.test3) - size, codec, encoded = solutions[0] - self.assertEqual(size, len(self.test3)) - - solutions = re_string.testAllEncodings(self.test4) - size, codec, encoded = solutions[0] - self.assertEqual(size, len(self.test4)) - - pass - - def test_nocopy_class(self): - # self.skipTest('') - s = '1234567890' - x = re_string.Nocopy(s, 2, 9) - x1 = s[2:9] - self.assertEqual(len(x), len(x1)) - for i in range(len(x)): - self.assertEqual(x[i], x1[i]) - # - val = x[2:4] - self.assertEqual(val, '56') - self.assertEqual(val, x[2:4]) - self.assertEqual(s[4:-1], x[2:]) - self.assertEqual(s[2:-1], x[:16]) - self.assertEqual(s[2:-1], x[:]) - self.assertEqual(s[2:-1], x[0:]) - self.assertEqual(s[2:-1], x) - - self.assertEqual(re_string.Nocopy(s, 9, 10), s[9:10]) - self.assertEqual(re_string.Nocopy(s, 9, 10), '0') - self.assertEqual(re_string.Nocopy(s, -2, -1), '9') - - # self.assertRaises(re_string.Nocopy(s,9,11)) - - def test_rfind_utf16(self): - # print len(self.test1) - self.assertEqual(0, re_string.rfind_utf16(self.test1, 0, len(self.test1), True, 4)) - self.assertEqual(0, re_string.rfind_utf16(self.test2, 0, len(self.test2), True, 4)) - self.assertEqual(0, re_string.rfind_utf16(self.test3, 0, len(self.test3), True, 4)) - self.assertEqual(-1, re_string.rfind_utf16(self.test4, 0, len(self.test4), True, 4)) - self.assertEqual(-1, re_string.rfind_utf16(self.test5, 0, len(self.test5), True, 4)) - self.assertEqual(-1, re_string.rfind_utf16(self.test6, 0, len(self.test6), True, 4)) - self.assertEqual(-1, re_string.rfind_utf16(self.test7, 0, len(self.test7), True, 4)) - # truncated last field - # print repr(self.test8[120:]) - self.assertEqual(122, re_string.rfind_utf16(self.test8, 0, len(self.test8), False, 4)) - # find start with limited size - self.assertEqual(0, re_string.rfind_utf16(self.test8, 0, 64, True, 4)) - # middle field ( 12+64 ) - self.assertEqual(12, re_string.rfind_utf16(self.test8, 64, 58, True, 4)) - # non aligned middle field ? - # TODO self.assertEqual( 4, re_string.rfind_utf16(self.test9, 0, - # len(self.test9) )) - ## - # self.assertEqual(0, re_string.rfind_utf16(self.test11, 0, 48, False, 4)) - print(re_string.rfind_utf16(self.test11, 0, 44, False, 4)) - - def test_find_ascii(self): - self.assertEqual( - (-1, -1), re_string.find_ascii(self.test1, 0, len(self.test1))) - self.assertEqual( - (0, 43), re_string.find_ascii( - self.test4, 0, len( - self.test4))) - self.assertEqual( - (0, 4), re_string.find_ascii( - self.test5, 0, len( - self.test5))) - self.assertEqual( - (0, 39), re_string.find_ascii( - self.test5, 5, len( - self.test5) - 5)) - self.assertEqual( - (-1, -1), re_string.find_ascii(self.test6, 0, len(self.test6))) - self.assertEqual( - (0, 42), re_string.find_ascii( - self.test6, 1, len( - self.test6) - 1)) - self.assertEqual( - (-1, -1), re_string.find_ascii(self.test10, 0, len(self.test10))) # too small - self.assertEqual( - (0, 10), re_string.find_ascii( - self.test10, 3, len( - self.test10) - 3)) - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - # logging.getLogger("re_string").setLevel(level=logging.DEBUG) - unittest.main(verbosity=0) - #suite = unittest.TestLoader().loadTestsFromTestCase(TestFunctions) - # unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/test/haystack/reverse/test_structure.py b/test/haystack/reverse/test_structure.py deleted file mode 100644 index 8ea377ea..00000000 --- a/test/haystack/reverse/test_structure.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""Tests for haystack.reverse.structure.""" - -import logging -import unittest - -import os - -from haystack.reverse import context -from haystack.reverse import config -from haystack.reverse import structure -from haystack.reverse import fieldtypes -from haystack.reverse.heuristics import dsa -from haystack.reverse.heuristics import pointertypes -from haystack import dump_loader - -__author__ = "Loic Jaquemet" -__copyright__ = "Copyright (C) 2012 Loic Jaquemet" -__license__ = "GPL" -__maintainer__ = "Loic Jaquemet" -__email__ = "loic.jaquemet+python@gmail.com" -__status__ = "Production" - -log = logging.getLogger("test_structure") - - -class TestStructure(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.dumpname = 'test/src/test-ctypes3.32.dump' - config.remove_cache_folder(cls.dumpname) - cls.memory_handler = dump_loader.load(cls.dumpname) - finder = cls.memory_handler.get_heap_finder() - heap_walker = finder.list_heap_walkers()[0] - heap_addr = heap_walker.get_heap_address() - cls.context = context.get_context_for_address(cls.memory_handler, heap_addr) - cls.target = cls.context.memory_handler.get_target_platform() - cls.dsa = dsa.FieldReverser(cls.context.memory_handler) - cls.pta = pointertypes.PointerFieldReverser(cls.context.memory_handler) - return - - @classmethod - def tearDownClass(cls): - config.remove_cache_folder(cls.dumpname) - cls.context = None - cls.target = None - cls.dsa = None - cls.pta = None - return - - def setUp(self): - return - - def tearDown(self): - return - - def test_decodeFields(self): - for s in self.context.listStructures(): - self.dsa.reverse_record(self.context, s) - pointer_fields = [f for f in s.get_fields() if f.is_pointer()] - if len(s) == 12: # Node + padding, 1 pointer on create - self.assertEqual(len(s.get_fields()), 3) # 1, 2 and padding - self.assertEqual(len(pointer_fields), 2) - elif len(s) == 20: # test3, 1 pointer on create - # fields, no heuristic to detect medium sized int - # TODO untyped of size < 8 == int * x - # print s.toString() - self.assertEqual(len(s.get_fields()), 3) # discutable - self.assertEqual(len(pointer_fields), 1) - return - - def test_resolvePointers(self): - for s in self.context.listStructures(): - self.dsa.reverse_record(self.context, s) - for s in self.context.listStructures(): - self.pta.reverse_record(self.context, s) - self.assertTrue(True) # test no error - - def test_resolvePointers2(self): - for s in self.context.listStructures(): - self.dsa.reverse_record(self.context, s) - self.assertEqual(s.get_reverse_level(), 10) - for s in self.context.listStructures(): - log.debug('RLEVEL: %d' % s.get_reverse_level()) - self.pta.reverse_record(self.context, s) - pointer_fields = [f for f in s.get_fields() if f.is_pointer()] - if len(s) == 12: # Node + padding, 1 pointer on create - self.assertEqual(len(s.get_fields()), 3) # 1, 2 and padding - self.assertEqual(len(pointer_fields), 2) - - def test_reset(self): - for s in self.context.listStructures(): - s.reset() - if isinstance(s, structure.CacheWrapper): - members = s.obj().__dict__ - else: - members = s.__dict__ - for name, value in members.items(): - if name in ['_size', '_memory_handler', '_name', '_vaddr', '_target']: - self.assertNotIn(value, [None, False]) - elif name in ['_dirty', '_AnonymousRecord__address', '_AnonymousRecord__record_type']: - self.assertTrue(value) - elif name in ['_fields']: - self.assertEqual(value, list()) - elif name in ['dumpname']: - self.assertTrue(os.access(value, os.F_OK)) - else: - self.assertIn(value, [None, False], name + ' not resetted') - - -class TestStructure2(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.dumpname = 'test/src/test-ctypes6.32.dump' - config.remove_cache_folder(cls.dumpname) - cls.memory_handler = dump_loader.load(cls.dumpname) - finder = cls.memory_handler.get_heap_finder() - heap_walker = finder.list_heap_walkers()[0] - heap_addr = heap_walker.get_heap_address() - cls.context = context.get_context_for_address(cls.memory_handler, heap_addr) - cls.target = cls.context.memory_handler.get_target_platform() - cls.dsa = dsa.FieldReverser(cls.context.memory_handler) - cls.pta = pointertypes.PointerFieldReverser(cls.context.memory_handler) - return - - @classmethod - def tearDownClass(cls): - config.remove_cache_folder(cls.dumpname) - cls.context = None - cls.target = None - cls.dsa = None - cls.pta = None - return - - def setUp(self): - return - - def tearDown(self): - return - - def test_string_overlap(self): - for s in self.context.listStructures(): - # s.resolvePointers() - self.dsa.reverse_record(self.context, s) - log.debug(s.to_string()) - self.assertTrue(True) # test no error - - def test_get_fields(self): - _record = structure.AnonymousRecord(self.memory_handler, 0xdeadbeef, 40) - word_size = self.target.get_word_size() - - f1 = fieldtypes.Field('f1', 0*word_size, fieldtypes.ZEROES, word_size, False) - f2 = fieldtypes.Field('f2', 1*word_size, fieldtypes.ZEROES, word_size, False) - fields = [f1, f2] - _record_type = structure.RecordType('struct_test', 2*word_size, fields) - _record.set_record_type(_record_type) - # same fields - self.assertEqual(f1, _record.get_fields()[0]) - self.assertEqual(f1, _record.get_field('f1')) - # get_fields return a new list of fields - x = _record.get_fields() - self.assertEqual(x, _record.get_fields()) - x.pop(0) - self.assertNotEqual(x, _record.get_fields()) - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - # logging.getLogger("test_structure").setLevel(logging.DEBUG) - #suite = unittest.TestLoader().loadTestsFromTestCase(TestFunctions) - # unittest.TextTestRunner(verbosity=2).run(suite) - unittest.main(verbosity=2) diff --git a/test/haystack/reverse/test_utils.py b/test/haystack/reverse/test_utils.py deleted file mode 100644 index 0fe8953c..00000000 --- a/test/haystack/reverse/test_utils.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""Tests haystack.utils .""" - -import unittest - -import numpy - -from haystack.reverse import utils -from haystack.reverse import context -from haystack import dump_loader - - -class TestBasicFunctions(unittest.TestCase): - - def setUp(self): - pass - - def test_closestFloorValue(self): - lst = numpy.asarray(range(0, 100, 10)) - self.assertEqual(utils.closestFloorValue(41, lst), (40, 4)) - self.assertEqual(utils.closestFloorValue(40, lst), (40, 4)) - with self.assertRaises(ValueError): - utils.closestFloorValue(-1, lst) - - memory_handler = dump_loader.load('test/src/test-ctypes3.32.dump') - finder = memory_handler.get_heap_finder() - walker = finder.list_heap_walkers()[0] - heap_addr = walker.get_heap_address() - ctx = context.get_context_for_address(memory_handler, heap_addr) - lst = ctx._structures_addresses - # print ['0x%0.8x'%i for i in lst] - - -if __name__ == '__main__': - unittest.main(verbosity=0) diff --git a/test/haystack/search/test_api.py b/test/haystack/search/test_api.py index 7c376d49..11c7addd 100644 --- a/test/haystack/search/test_api.py +++ b/test/haystack/search/test_api.py @@ -12,6 +12,7 @@ from haystack.search import api from test.haystack import SrcTests + class TestFunction(unittest.TestCase): def test_outputs(self): with self.assertRaises(TypeError): @@ -21,6 +22,7 @@ def test_outputs(self): with self.assertRaises(TypeError): api.output_to_string(None, None) + class _ApiTest(SrcTests): """ Basic loading of a memory dump and offsets values for all tests. @@ -63,7 +65,7 @@ def tearDown(self): self.ctypes3 = None def test_search(self): - results = haystack.search_record(self.memory_handler, self.ctypes3.struct_test3) + results = api.search_record(self.memory_handler, self.ctypes3.struct_test3) # without constraints, struct_test3 could be mapped pretty much anywhere in x64 # all valid record addresses are in self.offsets valid = self.offsets['test1'] + self.offsets['test3'] @@ -75,7 +77,7 @@ def test_search_with_constraints(self): # now add some constraints to the search for struct_test3 handler = constraints.ConstraintsConfigHandler() my_constraints = handler.read('test/src/ctypes3.constraints') - results = haystack.search_record(self.memory_handler, self.ctypes3.struct_test3, my_constraints) + results = api.search_record(self.memory_handler, self.ctypes3.struct_test3, my_constraints) # all valid record addresses are in self.offsets valid = self.offsets['test3'] self.assertEqual(len(results), len(valid)) @@ -83,9 +85,9 @@ def test_search_with_constraints(self): self.assertIn(addr, valid) # search struct_Node with constraints - results = haystack.search_record(self.memory_handler, self.ctypes3.struct_Node, my_constraints) + results = api.search_record(self.memory_handler, self.ctypes3.struct_Node, my_constraints) # check the string output - out = haystack.output_to_string(self.memory_handler, results) + out = api.output_to_string(self.memory_handler, results) valid = self.offsets['test1'] self.assertEqual(len(results), len(valid)) for x in valid: @@ -97,7 +99,7 @@ def test_search_with_constraints(self): def test_load(self): valid = self.offsets['test3'] for x in valid: - instance, validated = haystack.search.api.load_record(self.memory_handler, self.ctypes3.struct_test3, x) + instance, validated = api.load_record(self.memory_handler, self.ctypes3.struct_test3, x) self.assertTrue(validated) self.assertEqual(instance.val1, 0xdeadbeef) self.assertEqual(instance.val1b, 0xdeadbeef) @@ -107,7 +109,7 @@ def test_load(self): valid = self.offsets['test1'] for x in valid: - instance, validated = haystack.search.api.load_record(self.memory_handler, self.ctypes3.struct_Node, x) + instance, validated = api.load_record(self.memory_handler, self.ctypes3.struct_Node, x) self.assertTrue(validated) self.assertEqual(instance.val1, 0xdeadbeef) self.assertEqual(self.my_utils.get_pointee_address(instance.ptr1), x) @@ -119,7 +121,7 @@ class TestCTypes3_x32(TestCTypes3_x64): modulename = "test.src.ctypes3_gen32" def test_search(self): - results = haystack.search_record(self.memory_handler, self.ctypes3.struct_test3) + results = api.search_record(self.memory_handler, self.ctypes3.struct_test3) # without constraints, struct_test3 can only be mapped correctly to sutrc_test3. # struct_node is too small in x32 valid = self.offsets['test3'] @@ -159,11 +161,11 @@ def tearDown(self): def test_refresh(self): #handler = constraints.ConstraintsConfigHandler() #my_constraints = handler.read('test/src/ctypes6.constraints') - #results = haystack.search_record(self.memory_handler, self.usual_structname, my_constraints) + #results = api.search_record(self.memory_handler, self.usual_structname, my_constraints) # search struct_usual with constraints - results, validated = haystack.search.api.load_record(self.memory_handler, self.usual, self.address1) + results, validated = api.load_record(self.memory_handler, self.usual, self.address1) # check the string output - retstr = haystack.output_to_string(self.memory_handler, [(results, validated)]) + retstr = api.output_to_string(self.memory_handler, [(results, validated)]) self.assertTrue(isinstance(retstr, str)) # string @@ -185,7 +187,7 @@ def test_refresh(self): #self.assertIn('"blink": 0x%0.8x' % node1_list_addr, retstr) # python - usuals = haystack.output_to_python(self.memory_handler, [(results, validated)]) + usuals = api.output_to_python(self.memory_handler, [(results, validated)]) usual, validated = usuals[0] self.assertEqual(validated, True) self.assertEqual(usual.val1, 0x0aaaaaaa) @@ -206,15 +208,15 @@ def test_refresh(self): self.assertIsNone(usual.root.blink.flink.flink) # python 2 struct Node - results, validated = haystack.search.api.load_record(self.memory_handler, self.node, self.address2) - node1s = haystack.output_to_python(self.memory_handler, [(results, validated)]) + results, validated = api.load_record(self.memory_handler, self.node, self.address2) + node1s = api.output_to_python(self.memory_handler, [(results, validated)]) node1, validated = node1s[0] self.assertEqual(validated, True) self.assertEqual(node1.val1, 0xdeadbeef) self.assertEqual(node1.val2, 0xffffffff) - results, validated = haystack.search.api.load_record(self.memory_handler, self.node, self.address3) - node2s = haystack.output_to_python(self.memory_handler, [(results, validated)]) + results, validated = api.load_record(self.memory_handler, self.node, self.address3) + node2s = api.output_to_python(self.memory_handler, [(results, validated)]) node2, validated = node2s[0] self.assertEqual(validated, True) self.assertEqual(node2.val1, 0xdeadbabe) @@ -231,7 +233,7 @@ def test_refresh(self): def test_search(self): handler = constraints.ConstraintsConfigHandler() my_constraints = handler.read('test/src/ctypes6.constraints') - results = haystack.search_record(self.memory_handler, self.node, my_constraints) + results = api.search_record(self.memory_handler, self.node, my_constraints) # 2 from test1 # 3 from test_pointer_to_list # the rest have bad values in constrainged fields @@ -245,7 +247,7 @@ def test_search(self): # FIXME there is a circular reference in json. #with self.assertRaises(ValueError): - # haystack.output_to_json(self.memory_handler, results) + # api.output_to_json(self.memory_handler, results) #self.assertEqual(node2s['val1'], 0xdeadbabe) #self.assertEqual(node2s['val2'], 0xffffffff) model = self.memory_handler.get_model() diff --git a/test/haystack/search/test_searcher.py b/test/haystack/search/test_searcher.py index 0797c8c4..7330570e 100644 --- a/test/haystack/search/test_searcher.py +++ b/test/haystack/search/test_searcher.py @@ -9,7 +9,6 @@ from haystack.search import searcher - class TestApiWin32Dump(unittest.TestCase): """ test if the API works for windows @@ -54,7 +53,7 @@ def test_load(self): my_loader = searcher.RecordLoader(self.memory_handler) res = my_loader.load(heapwalker._heap_module.HEAP, self.known_heaps[0][0]) - res_p = haystack.output_to_python(self.memory_handler, [res]) + res_p = api.output_to_python(self.memory_handler, [res]) instance, validated = res_p[0] # no constraints loaded, subsegmentcode pointer went to is_valid self.assertFalse(validated) @@ -62,7 +61,7 @@ def test_load(self): # now lets just use the win7heap constraints my_loader = searcher.RecordLoader(self.memory_handler, heapwalker._heap_module_constraints) res = my_loader.load(heapwalker._heap_module.HEAP, self.known_heaps[0][0]) - res_p = haystack.output_to_python(self.memory_handler, [res]) + res_p = api.output_to_python(self.memory_handler, [res]) instance, validated = res_p[0] # no constraints loaded, subsegmentcode pointer went to is_valid self.assertTrue(validated) @@ -73,7 +72,7 @@ def test_load(self): # try a misalign read res = my_loader.load(heapwalker._heap_module.HEAP, self.known_heaps[0][0] + 1) - res_p = haystack.output_to_python(self.memory_handler, [res]) + res_p = api.output_to_python(self.memory_handler, [res]) instance, validated = res_p[0] self.assertFalse(validated) self.assertIsInstance(instance, object) @@ -85,7 +84,6 @@ def test_load(self): return - if __name__ == '__main__': logging.basicConfig(level=logging.INFO) # logging.getLogger('searcher').setLevel(logging.DEBUG)