Skip to content

Commit

Permalink
JQ-324 (cgates): pad trailing format field values; improved unit test…
Browse files Browse the repository at this point in the history
…s of common_tags.py
  • Loading branch information
cgates committed May 28, 2019
1 parent 83dd61d commit fb8b6ca
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 23 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Changelog
=========

1.1.2 (5/25/2019)
-----------------
- Adjusted VCF handling to correctly pad optional trailing FORMAT fields

1.1.1 (10/30/2018)
-----------------
- Adjusted Mutect translators to:
Expand Down
2 changes: 1 addition & 1 deletion jacquard/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.1.1"
__version__ = "1.1.2"
36 changes: 27 additions & 9 deletions jacquard/utils/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,36 +332,53 @@ def get_empty_record(self):
ref=self.ref,
alt=self.alt)

def _format_field(self):
"""Returns string representation of format field."""
def _format_tag_fields(self):
"""Returns list of format tag names."""
tag_names = []
format_field = "."
if self.sample_tag_values:
first_sample = list(self.sample_tag_values.keys())[0]
tag_names = self.sample_tag_values[first_sample].keys()
if tag_names:
format_field = ":".join(tag_names)
return format_field
return tag_names

def _sample_field(self, sample):
"""Returns string representation of sample-format values.
def _sample_field(self, tag_names, sample):
"""Returns string representation of sample-format values ordered by tag_names.
Tag_names must be superset of sample_tag_values.
Missing sample_tag_values padded as '.'.
If tag_names empty, returns '.'.
Raises:
KeyError: if requested sample is not defined.
ValueError if sample_tag_values has more keys than tag_names.
"""
tag_values = self.sample_tag_values[sample].values()
sample_tag_values = self.sample_tag_values[sample]
missing_tag_names = set(sample_tag_values) - set(tag_names)
if missing_tag_names:
msg = ('sample format tags are not consistent: '
'{}:{}:{}:{}').format(self.chrom,
self.pos,
self.ref,
self.alt)
raise ValueError(msg)
tag_values = [sample_tag_values.get(t, '.') for t in tag_names]
if tag_values:
return ":".join(tag_values)
else:
return "."

def text(self):
"Returns tab-delimited, newline terminated string of VcfRecord."
tag_names = self._format_tag_fields()
format_field = '.' if not tag_names else ':'.join(tag_names)

stringifier = [self.chrom, self.pos, self.vcf_id, self.ref, self.alt,
self.qual, self.filter, self.info,
self._format_field()]
format_field]

for sample in self.sample_tag_values:
stringifier.append(self._sample_field(sample))
stringifier.append(self._sample_field(tag_names, sample))

return "\t".join(stringifier) + "\n"

Expand Down Expand Up @@ -398,6 +415,8 @@ def add_or_replace_filter(self, new_filter):
self.filter = ";".join([self.filter,
new_filter])

#TODO: cgates: This is not a good equals method. Please adjust this to
# consider the full set of instance variables.
def __eq__(self, other):
return isinstance(other, VcfRecord) and self._key == other._key

Expand Down Expand Up @@ -469,4 +488,3 @@ def __hash__(self):
def __lt__(self, other):
key = natsort.natsort_keygen()
return key(self.file_name) < key(other.file_name)

2 changes: 1 addition & 1 deletion jacquard/variant_caller_transforms/common_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ class ReportedTag(AbstractJacquardTag):
it explicitly simplifies how summary tags are generated.
"""
#pylint: disable=too-few-public-methods

def __init__(self, caller_abbreviation):
super(self.__class__,
self).__init__(caller_abbreviation,
Expand Down
2 changes: 1 addition & 1 deletion test/jacquard_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_error_raisesTransformedMessage(self):

class JacquardTestCase(test_case.JacquardBaseTestCase):
def test_version(self):
self.assertEquals("1.1.1", jacquard.__version__)
self.assertEquals("1.1.2", jacquard.__version__)

def test_get_execution_context(self):
command = "foo input_dir output_dir"
Expand Down
15 changes: 12 additions & 3 deletions test/utils/vcf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,20 +367,20 @@ def test_format_field(self):
sample_names = ["SA", "SB"]
input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F3:F1:F2|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n")
record = VcfRecord.parse_record(input_line, sample_names)
self.assertEquals("F3:F1:F2", record._format_field())
self.assertEquals(["F3", "F1", "F2"], list(record._format_tag_fields()))

def test_format_field_emptyWhenNoSamples(self):
input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO\n")
record = VcfRecord.parse_record(input_line, [])
self.assertEquals(".", record._format_field())
self.assertEquals([], record._format_tag_fields())

def test_format_field_preservesOrderWhenAddingNewTags(self):
sample_names = ["SA", "SB"]
input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F3:F1:F2|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n")
record = VcfRecord.parse_record(input_line, sample_names)
record.add_sample_tag_value("Z4", {"SA" : "SA.4", "SB" : "SB.4"})
record.add_sample_tag_value("A5", {"SA" :"SA.A5", "SB" : "SB.A5"})
self.assertEquals("F3:F1:F2:Z4:A5", record._format_field())
self.assertEquals(["F3", "F1", "F2", "Z4", "A5"], list(record._format_tag_fields()))

def test_parse_record_sample_dict(self):
sample_names = ["SampleA", "SampleB"]
Expand Down Expand Up @@ -524,6 +524,15 @@ def test_asTextWhenEmptyFormatField(self):
expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|.|.|.\n")
self.assertEquals(expected, record.text())

def test_asTextExpandsEmptyTrailingFormatField(self):
sampleA = OrderedDict({'a':'1', 'b':'2'})
sampleB = OrderedDict({'a':'10'})
sample_tag_values = OrderedDict({"SampleA":sampleA, "SampleB":sampleB})
record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL", "FILTER", "INFO", sample_tag_values)
expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|a:b|1:2|10:.\n")
self.assertEquals(expected, record.text())


def test_equals(self):
sample_names = ["sampleA"]
base = VcfRecord.parse_record(self.entab("A|1|ID|C|D|QUAL|FILTER|INFO|F|S\n"), sample_names)
Expand Down
26 changes: 18 additions & 8 deletions test/variant_caller_transforms/common_tags_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,14 @@ def test_reported_tag_metaheader(self):
reported_tag.metaheader)

def test_reported_tag_format(self):
reported_tag = common_tags.ReportedTag("foo_")
reported_tag = common_tags.ReportedTag("foo")
actual_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2|SA.1:SA.2|SB.1:SB.2\n")
actual_vcf_record = VcfRecord.parse_record(actual_line, ["SA", "SB"])
expected_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:foo_CALLER_REPORTED"
"|SA.1:SA.2:1|SB.1:SB.2\n")
expected_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:JQ_foo_CALLER_REPORTED"
"|SA.1:SA.2:1|SB.1:SB.2:1\n")
expected_vcf_record = VcfRecord.parse_record(expected_line, ["SA", "SB"])
reported_tag.add_tag_values(actual_vcf_record)
self.assertEquals(expected_vcf_record, actual_vcf_record)
self.assertEquals(expected_vcf_record.text(), actual_vcf_record.text())

def test_passed_tag_metaheader(self):
passed_tag = common_tags.PassedTag("foo")
Expand All @@ -176,12 +176,22 @@ def test_passed_tag_metaheader(self):
'original VCF">').format("JQ_foo_CALLER_PASSED"),
passed_tag.metaheader)

def test_passed_tag_format(self):
def test_passed_tag_format_noPass(self):
passed_tag = common_tags.PassedTag("foo")
actual_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2|SA.1:SA.2|SB.1:SB.2\n")
actual_vcf_record = VcfRecord.parse_record(actual_line, ["SA", "SB"])
expected_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:foo_CALLER_REPORTED"
"|SA.1:SA.2:1|SB.1:SB.2\n")
expected_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:JQ_foo_CALLER_PASSED"
"|SA.1:SA.2:0|SB.1:SB.2:0\n")
expected_vcf_record = VcfRecord.parse_record(expected_line, ["SA", "SB"])
passed_tag.add_tag_values(actual_vcf_record)
self.assertEquals(expected_vcf_record.text(), actual_vcf_record.text())

def test_passed_tag_format_noPass(self):
passed_tag = common_tags.PassedTag("foo")
actual_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|PASS|INFO|F1:F2|SA.1:SA.2|SB.1:SB.2\n")
actual_vcf_record = VcfRecord.parse_record(actual_line, ["SA", "SB"])
expected_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|PASS|INFO|F1:F2:JQ_foo_CALLER_PASSED"
"|SA.1:SA.2:1|SB.1:SB.2:1\n")
expected_vcf_record = VcfRecord.parse_record(expected_line, ["SA", "SB"])
passed_tag.add_tag_values(actual_vcf_record)
self.assertEquals(expected_vcf_record, actual_vcf_record)
self.assertEquals(expected_vcf_record.text(), actual_vcf_record.text())

0 comments on commit fb8b6ca

Please sign in to comment.