In [2]:
from pprint import pprint
import os

# Read results files

In [27]:
# assumes we're in {app}-{version}/ with 
# {system_name}-{compute_instance_type}/{compiler_family}-{mpi_family}/runs/{run_label/}run.json
#
# could probably do with abstracting this out - everything will need it!

def fs_tree(root='.'):
    """ Return a nested dict of the filetree below `root`.
    
        Directory names are represented as keys.
        File names are available under the key "./".
    """
    results = {}
    for (dirpath, dirnames, filenames) in os.walk(root):
        # in-place filter of dirnames to avoid hidden directories:
        for idx, d in enumerate(dirnames):
            if d.startswith('.'):
                del dirnames[idx]
        parts = dirpath.split(os.sep)
        if parts[0] == '.':
            parts = parts[1:]
        curr = results
        for p in parts:
            curr = curr.setdefault(p, {'./':filenames})
    return results


# TODO: separate out part characterising paths into labels and finding filenames
def get_run_outputs(root='.', ext='.out'):
    """ Find {run-label} directories and return output files with given extension.
    
        `root` should be the path to the {app}-{version}/ directory.
        
        Returns a sequence of `(path, labels)` tuples where:
            `path`: path to file (relative to `root`)
            `labels`: a dict with keys:
                'system_name'
                'compute_instance_type'
                'compiler_family'
                'mpi_family'
                'run_label'
                'filename'
    """
    ftree = fs_tree(root)
    stack = [([], ftree)]
    results = []
    while stack:
        parts, curr = stack.pop()
        if 'runs' in curr.keys():
            for rundir in curr['runs']:
                if rundir == './':
                    continue
                for runfile in curr['runs'][rundir]['./']:
                    if os.path.splitext(runfile)[-1] != ext:
                        continue
                    fpath = os.path.join(* parts + ['runs', rundir, runfile])
                    lables = {'system_name':parts[0].split('-')[0],
                              'compute_instance_type':parts[0].split('-')[1],
                              'compiler_family':parts[1].split('-')[0],
                              'mpi_family':parts[1].split('-')[1],
                              'run_label':rundir,
                              'filename':runfile,
                             }
                    results.append((fpath, lables))
        else:
            for k, v in curr.items():
                if k != './':
                    stack.append((parts + [k], v))
                    
    return results
            
run_outs = get_run_outputs()


sausage-hotdog/gnu8-openmpi3/runs/N2npn1/slurm-13.out :
{'compiler_family': 'gnu8',
 'compute_instance_type': 'hotdog',
 'filename': 'slurm-13.out',
 'mpi_family': 'openmpi3',
 'run_label': 'N2npn1',
 'system_name': 'sausage'}
sausage-hotdog/gnu8-openmpi3/runs/N2npn1/slurm-14.out :
{'compiler_family': 'gnu8',
 'compute_instance_type': 'hotdog',
 'filename': 'slurm-14.out',
 'mpi_family': 'openmpi3',
 'run_label': 'N2npn1',
 'system_name': 'sausage'}
sausage-hotdog/gnu8-openmpi3/runs/N2npn1/slurm-15.out :
{'compiler_family': 'gnu8',
 'compute_instance_type': 'hotdog',
 'filename': 'slurm-15.out',
 'mpi_family': 'openmpi3',
 'run_label': 'N2npn1',
 'system_name': 'sausage'}
sausage-hotdog/gnu8-openmpi3/runs/N2npn1/slurm-16.out :
{'compiler_family': 'gnu8',
 'compute_instance_type': 'hotdog',
 'filename': 'slurm-16.out',
 'mpi_family': 'openmpi3',
 'run_label': 'N2npn1',
 'system_name': 'sausage'}


In [29]:
def read_imb_out(path):
    results = {}
    COL_TYPES = {'Uniband':(int, int, float, int),
                 'Biband':(int, int, float, int),
                }
    with open(path) as f:
        for line in f:
            if line.startswith('# Benchmarking '):
                benchmark = line.split()[-1]
                processes = next(f).split()[-1] # "# #processes = 2"
                result = {'name':benchmark, 'data':{}, 'params':{'processes':processes}}
                results[benchmark] = result
                next(f) # skip header
                while True:
                    cols = next(f).split()
                    if cols == []:
                        break
                    if cols[0].startswith('#'): # header row
                        header = cols
                        for label in header:
                            result['data'][label] = []
                    else:
                        for label, opr, value in zip(header, COL_TYPES[benchmark], cols):
                            result['data'][label].append(opr(value))
    return results

for path, labels in run_outs:
    print 'path:', path
    pprint(read_imb_out(path))                            



path: sausage-hotdog/gnu8-openmpi3/runs/N2npn1/slurm-13.out
{}
path: sausage-hotdog/gnu8-openmpi3/runs/N2npn1/slurm-14.out
{}
path: sausage-hotdog/gnu8-openmpi3/runs/N2npn1/slurm-15.out
{}
path: sausage-hotdog/gnu8-openmpi3/runs/N2npn1/slurm-16.out
{'Biband': {'data': {'#bytes': [0,
                                1,
                                2,
                                4,
                                8,
                                16,
                                32,
                                64,
                                128,
                                256,
                                512,
                                1024,
                                2048,
                                4096,
                                8192,
                                16384,
                                32768,
                                65536,
                                131072,
                                262144,
        

In [None]:
pprint(results)