Skip to content

Commit

Permalink
ex-264 (cgates): Adjusted to work in py3; minor PEP8 cleanup; removed…
Browse files Browse the repository at this point in the history
… some deprecated methods
  • Loading branch information
cgates committed May 23, 2015
1 parent 7473edb commit 4632591
Show file tree
Hide file tree
Showing 13 changed files with 42 additions and 68 deletions.
4 changes: 3 additions & 1 deletion jacquard/expand.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,9 @@ def execute(args, dummy_execution_context):
#for the moment, there is no good place to put the execution context
input_file = os.path.abspath(args.input)
output_file = os.path.abspath(args.output)
col_spec = args.selected_columns_file if args.selected_columns_file else None
col_spec = None
if args.selected_columns_file:
col_spec = args.selected_columns_file

logger.debug("Expanding [{}] to [{}]",
input_file,
Expand Down
4 changes: 2 additions & 2 deletions jacquard/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,8 @@ def add_subparser(subparser):
parser.add_argument("-v", "--verbose", action='store_true')

#TODO (cgates): This module is both a command and also manipulates VcfRecords
# like a caller. This is the only body of code that does both these things.
# Does this bother anyone else?
# like a caller transform. This is the only body of code that does both these
# things. Does this bother anyone else?
def execute(args, execution_context):
validate_args(args)

Expand Down
1 change: 1 addition & 0 deletions jacquard/utils/summarize_rollup_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ def add_tag_values(vcf_record):
_add_sample_count_values(vcf_record, JQ_PASSED, JQ_SAMPLES_PASSED)

class _HCGenotypeTag(object):
#pylint: disable=too-few-public-methods
_TAG_ID = "{}HC_GT".format(JQ_SUMMARY_TAG)
_PATTERN = re.compile("^JQ_.*_GT$")

Expand Down
2 changes: 1 addition & 1 deletion jacquard/utils/summarize_zscore_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def _get_dependent_value(tag_values, dependent_tag_id):
return None

def _init_population_stats(self, vcf_reader, dependent_tag_id):
'''Derive mean and stdev.
'''Derive mean and stdev.
Adapted from online variance algorithm from Knuth, The Art of Computer
Programming, volume 2
Expand Down
2 changes: 2 additions & 0 deletions jacquard/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def sort_metaheaders(metaheaders):
.split("=")[0]])

class _JacquardHelpFormatter(argparse.RawTextHelpFormatter):
#pylint: disable=too-few-public-methods
def _format_usage(self, default_values):
prog = '%(prog)s' % dict(prog=self._prog)
usage = 'usage: {} <input> <output> {}'.format(prog, default_values)
Expand All @@ -48,6 +49,7 @@ def _format_usage(self, default_values):
def add_usage(self, default_values, actions=None, groups=None, prefix=None):
self._add_item(self._format_usage, default_values)


class JQException(Exception):
"""Base class for all run-time exceptions in this module."""
def __init__(self, msg, *args):
Expand Down
14 changes: 5 additions & 9 deletions jacquard/variant_caller_transforms/common_tags.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Common tags used by several callers."""
from __future__ import print_function, absolute_import, division
import abc
from abc import abstractmethod
import jacquard.utils.utils as utils

CALLER_REPORTED_TAG = "CALLER_REPORTED"
Expand All @@ -24,24 +23,21 @@ def __init__(self, abbreviation, vcf_type, vcf_number):
GENOTYPE_TAG = _TagType("GT", "String", "1")
ALLELE_FREQ_TAG = _TagType("AF", "Float", "A")
SOMATIC_TAG = _TagType("HC_SOM", "Integer", "1")

def __init__(self, variant_caller_abbrev, tag_type, description):
if '"' in description:
raise utils.JQException(("Metaheader descriptions cannot contain "
"double quotes: [{}]"),
description)
self.tag_id = "JQ_{}_{}".format(variant_caller_abbrev,tag_type.abbreviation)

self.tag_id = "JQ_{}_{}".format(variant_caller_abbrev,
tag_type.abbreviation)
self.metaheader = JacquardTag.FORMAT.format(self.tag_id,
tag_type.vcf_number,
tag_type.vcf_type,
description)



@abstractmethod
def add_tag_values(self):
pass
def add_tag_values(self, vcf_record):
raise NotImplementedError()


class ReportedTag(object):
Expand Down
13 changes: 4 additions & 9 deletions jacquard/variant_caller_transforms/mutect.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,15 @@ def add_tag_values(self, vcf_record):
vcf_record.add_sample_tag_value(self.tag_id, sample_values)

class _SomaticTag(common_tags.JacquardTag):
_DESCRIPTION = (\
'''Jacquard somatic status for MuTect: 0=non-somatic,1=somatic (based on SS
FORMAT tag)''').replace("\n","")
#pylint: disable=too-few-public-methods
def __init__(self):
super(self.__class__,
self).__init__(MUTECT_ABBREVIATION,
common_tags.JacquardTag.SOMATIC_TAG,
'Jacquard somatic status for MuTect: 0=non-somatic,1=somatic (based on SS FORMAT tag)')
self._DESCRIPTION)

def add_tag_values(self, vcf_record):
sample_values = {}
Expand Down Expand Up @@ -117,14 +120,6 @@ def __init__(self):
self.name = "MuTect"
self.abbr = "MT"

##TODO (cgates): deprecate; remove
@staticmethod
def validate_input_file(meta_headers, dummy_column_header):
for line in meta_headers:
if line.startswith(Mutect._MUTECT_METAHEADER_PREFIX):
return True
return False

@staticmethod
def _is_mutect_vcf(file_reader):
if file_reader.file_name.lower().endswith(".vcf"):
Expand Down
7 changes: 2 additions & 5 deletions jacquard/variant_caller_transforms/strelka.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
VERSION = "v2.0.15"

class _GenotypeTag(common_tags.JacquardTag):
#pylint: disable=too-few-public-methods

_INDEL_VALUES = ["ref", "hom", "het"]

def __init__(self):
Expand Down Expand Up @@ -231,11 +233,6 @@ def __init__(self):
self.abbr = "SK"
self.meta_header = "##jacquard.normalize_strelka.sources={0},{1}\n"

##TODO (cgates): deprecated; remove
@staticmethod
def validate_input_file(meta_headers, dummy_column_header):
return "##source=strelka" in meta_headers

@staticmethod
def _is_strelka_vcf(file_reader):
if file_reader.file_name.endswith(".vcf"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"Strelka": strelka.VERSION}

class VariantCallerFactory(object):
#pylint: disable=too-few-public-methods
def __init__(self, args=None):
self._callers = [varscan.Varscan(args),
strelka.Strelka(),
Expand Down
16 changes: 2 additions & 14 deletions jacquard/variant_caller_transforms/varscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,18 +196,6 @@ def _get_hc_file_pattern(args):

return compiled_regex

##TODO (cgates): deprecated; remove
@staticmethod
def validate_input_file(meta_headers, column_header):
if "##source=VarScan2" not in meta_headers:
return 0

if _VARSCAN_SOMATIC_HEADER == column_header:
return 1
else:
raise utils.JQException("Unexpected VarScan VCF structure - "
"missing NORMAL and TUMOR headers.")

@staticmethod
def _validate_filter_file(file_reader):
column_header = 0
Expand Down Expand Up @@ -394,7 +382,7 @@ def _create_vcf_readers(pair_tuples):

#pylint: disable=too-many-locals
def claim(self, file_readers):
"""Recognizes and claims MuTect VCFs form the set of all input VCFs.
"""Recognizes and claims VarScan VCFs form the set of all input VCFs.
Each defined caller has a chance to evaluate and claim all the incoming
files as something that it can process. Since VarScan can claim
Expand All @@ -405,7 +393,7 @@ def claim(self, file_readers):
file_readers: the collection of currently unclaimed files
Returns:
A tuple of unclaimed readers and MuTectVcfReaders.
A tuple of unclaimed readers and VarScanVcfReaders.
"""

(prefix_to_readers,
Expand Down
22 changes: 11 additions & 11 deletions test/utils/test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,17 @@ def assertCommand(self, command, expected_dir):

output = command[2]

#try:
if os.path.isfile(output):
output_file = os.path.basename(output)
self._compare_files(output, output_file, expected_dir)
elif os.path.isdir(output):
for output_file in os.listdir(output):
new_output = os.path.join(output, output_file)
self._compare_files(new_output, output_file, expected_dir)
#except self.failureException as e:
# msg = "discrepancy in command [{}]: {}".format(" ".join(command), e)
# raise self.failureException(msg)
try:
if os.path.isfile(output):
output_file = os.path.basename(output)
self._compare_files(output, output_file, expected_dir)
elif os.path.isdir(output):
for output_file in os.listdir(output):
new_output = os.path.join(output, output_file)
self._compare_files(new_output, output_file, expected_dir)
except self.failureException as e:
msg = "discrepancy in command [{}]: {}".format(" ".join(command), e)
raise self.failureException(msg)

def entab(self, string, old="|"):
return string.replace(old, "\t")
16 changes: 8 additions & 8 deletions test/variant_caller_transforms/common_tags_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ def test_somatic_tag(self):
self.assertEquals("Integer", tag.vcf_type)
self.assertEquals("1", tag.vcf_number)

def test_add_tag_values_is_abstract(self):
try:
class FakeTag(common_tags.JacquardTag):
def __init__(self): pass
FakeTag()
self.fail(("Should not be able to instantiate JacquardTag without "
"overrideing abstract methods."))
except TypeError: pass
def test_add_tag_values_raisesNotImplementedError(self):
class FakeTag(common_tags.JacquardTag):
def __init__(self): pass
tag = FakeTag()
self.assertRaises(NotImplementedError,
tag.add_tag_values,
VcfRecord("1", "42", "A", "C")
)

def test_metaheader(self):
tag_type = common_tags.JacquardTag.GENOTYPE_TAG
Expand Down
8 changes: 0 additions & 8 deletions test/variant_caller_transforms/mutect_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,6 @@ def setUp(self):
super(MutectTestCase, self).setUp()
self.caller = mutect.Mutect()

def test_validateInputFile_isValid(self):
metaheaders = ["##MuTect=blah"]
self.assertTrue(self.caller.validate_input_file(metaheaders, "#column_header"))

def test_validateInputFile_isNotValid(self):
metaheaders = ["Foo"]
self.assertFalse(self.caller.validate_input_file(metaheaders, "#column_header"))

def test_claim(self):
record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
content1 = ["##foo", "##source=strelka", "#chrom", record1]
Expand Down

0 comments on commit 4632591

Please sign in to comment.