Skip to content

Commit

Permalink
gbeast: Use the -resume option if a .trees file is already in inp…
Browse files Browse the repository at this point in the history
…ut directory.
  • Loading branch information
riccardomurri committed Nov 23, 2016
1 parent 9bd8eee commit eaa645f
Showing 1 changed file with 81 additions and 30 deletions.
111 changes: 81 additions & 30 deletions gc3apps/irm.uzh.ch/gbeast.py
@@ -1,8 +1,9 @@
#! /usr/bin/env python
#
"""
Run the BEAST or BEAST-2 programs from a prepared tree of input files.
"""
# Copyright (C) 2012-2013, GC3, University of Zurich. All rights reserved.
# Copyright (C) 2012-2016 University of Zurich. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
Expand All @@ -22,73 +23,123 @@
__version__ = '$Revision$'


import os


import gc3libs
from gc3libs.cmdline import SessionBasedScript
from gc3libs.quantity import MiB, GiB
import os
from gc3libs.quantity import MiB, GB

class GBeastApp(gc3libs.Application):
def __init__(self, beast, jarfile, dirname, fname, ncores, **extra):
infile = os.path.join(dirname, fname)

extra['output_dir'] = fname[:-4] + '.d'
def find_files(dirname, suffix=None):
"""
Return a list of (full paths to) files in the given directory.
If `suffix` is given, restrict to files with that ending.
"""
result = []
for entry in os.listdir(dirname):
path = os.path.join(dirname, entry)
if os.path.isfile(path):
if suffix and not entry.endswith(suffix):
continue
result.append(path)
return result


class GBeastApp(gc3libs.Application):
def __init__(self, jarfile, dirname, ncores, **extra):
extra['output_dir'] = os.path.basename(dirname)
extra['requested_cores'] = ncores
extra['requested_memory'] = ncores*4*GiB
extra['requested_memory'] = ncores*4*GB

inputs = find_files(dirname)

xmls = [path for path in inputs if path.endswith('.xml')]

treefiles = [path for path in inputs if path.endswith('.trees')]
if treefiles:
# Natasha's config places seed in .log/.trees file name
treename, _ = treefiles[0].split('.')
parts = treename.split('_')
self.seed = int(parts[-1])
self.resume = True
else:
self.seed = None
self.resume = False

# build command-line
args = ['java',
'-Xmx%dm' % extra['requested_memory'].amount(MiB),
'-Xmx{mem_mb:d}m'.format(mem_mb=extra['requested_memory'].amount(MiB)),
'-jar', jarfile,
'-threads', ncores,
'-beagle_instances', ncores,
fname]
'-beagle_instances', ncores]
if self.resume:
args += ['-resume']
if self.seed:
args += ['-seed', self.seed]
args += [xml]

gc3libs.Application.__init__(
self,
arguments = args,
inputs = [infile],
inputs = [xmls[0]],
outputs = gc3libs.ANY_OUTPUT,
stdout = 'stdout.txt',
stderr = 'stderr.txt',
**extra)


class GBeastScript(SessionBasedScript):
"""Script to parallelize execution of BEAST over multiple input files. Takes a directory containing XML files, parse
"""
version = '1.0'
Script to parallelize execution of BEAST over multiple input files.
Takes a list of directories containing XML files and processes them all.
If a `.trees` file is already present in the directory, BEAST is passed
the ``-resume`` option to append to existing files instead of starting over.
"""
version = '1.1'
def setup_options(self):
self.add_param('-b', '--beast', choices=['beast1', 'beast2'],
help='Beast version to run')
self.add_param('--beast1', default='/apps/BEASTv1.8.2/lib/beast.jar', help='Path to BEAST v1 jar file')
self.add_param('--beast2', default='/apps/BEASTv2.3.2/lib/beast.jar', help='Path to BEAST v2 jar file')
self.add_param('path', help='Directory containing XML input files for beast')
self.add_param('path', nargs='+', help='Directory containing XML input files for beast')
self.add_param('--cores', default=1, type=int, help="Amount of cores to use. Default: %(default)s")

def parse_args(self):
if not os.path.isdir(self.params.path):
raise gc3libs.exceptions.InvalidUsage("%s is not a directory", self.params.path)
raise gc3libs.exceptions.InvalidUsage(
"{0} is not a directory".format(self.params.path))

def new_tasks(self, extra):
tasks = []
for fname in os.listdir(self.params.path):
if fname.endswith('.xml'):
for dirname in self.params.path:
if self.params.beast:
beast = self.params.beast
if self.params.beast:
beast = self.params.beast
elif 'BEAST1' in fname:
else:
xmls = find_files(dirname, '.xml')
if len(xmls) > 1:
self.log.error(
"More than 1 `.xml` file in directory `{dirname}`: {xmls!r}"
" Skipping it."
.format(**locals()))
continue
xml = xmls[0]
if 'BEAST1' in xml:
beast = 'beast1'
elif 'BEAST2' in fname:
elif 'BEAST2' in xml:
beast = 'beast2'
else:
gc3libs.error("Unable to guess which version of BEAST you want to run. Skipping file %s" % fname)
gc3libs.error(
"Unable to guess which version of BEAST you want to run."
" Skipping file `{xml}`"
.format(xml=xml)
continue
jarfile = self.params.beast1 if beast == 'beast1' else self.params.beast2
tasks.append(GBeastApp(beast,
jarfile,
self.params.path,
fname,
self.params.cores,
**extra.copy()))
jarfile = (self.params.beast1 if beast == 'beast1' else self.params.beast2)
tasks.append(GBeastApp(jarfile,
dirname,
self.params.cores,
**extra.copy()))
return tasks

## main
Expand Down

0 comments on commit eaa645f

Please sign in to comment.