Skip to content

Commit

Permalink
EX-189 (cgates): Actually enabled sorting in merge
Browse files Browse the repository at this point in the history
  • Loading branch information
cgates committed Mar 2, 2015
1 parent 5d08d1a commit 06a686c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 31 deletions.
18 changes: 5 additions & 13 deletions jacquard/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,32 +110,19 @@ def _create_reader_lists(input_files):
def _build_coordinates(vcf_readers):
coordinate_set = set()
mult_alts = defaultdict(set)
error = 0

for vcf_reader in vcf_readers:
previous_record = None
try:
vcf_reader.open()

for vcf_record in vcf_reader.vcf_records():
if previous_record and vcf_record < previous_record:
logger.error("VCF file:chrom:pos [{}:{}:{}] is out of order"
.format(vcf_reader.file_name,
vcf_record.chrom,
vcf_record.pos))
error = 1
previous_record = vcf_record
coordinate_set.add(vcf_record.get_empty_record())
ref_alt = vcf_record.ref, vcf_record.alt
locus = vcf_record.chrom, vcf_record.pos
mult_alts[locus].add(ref_alt)
finally:
vcf_reader.close()

if error:
raise utils.JQException("One or more VCF files were not sorted. "
"Review inputs and try again.")

for vcf_record in coordinate_set:
ref_alts_for_this_locus = mult_alts[vcf_record.chrom,
vcf_record.pos]
Expand Down Expand Up @@ -185,6 +172,10 @@ def _get_unsorted_readers(vcf_readers):
reader.open()
for vcf_record in reader.vcf_records():
if previous_record and vcf_record < previous_record:
logger.debug("VCF file:chrom:pos [{}:{}:{}] is out of order"
.format(reader.file_name,
vcf_record.chrom,
vcf_record.pos))
unsorted_readers.append(reader)
break
else:
Expand Down Expand Up @@ -379,6 +370,7 @@ def execute(args, execution_context):

buffered_readers, vcf_readers = _create_reader_lists(input_files)

vcf_readers = _sort_readers(vcf_readers, output_path)
all_sample_names, merge_metaheaders = _build_sample_list(vcf_readers)
coordinates = _build_coordinates(vcf_readers)
format_tags_to_keep = _build_format_tags(format_tag_regex, vcf_readers)
Expand Down
23 changes: 5 additions & 18 deletions test/merge_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,24 +93,6 @@ def test_build_coordinates(self):
expected = [fileArec1, fileArec2, fileBrec2]
self.assertEquals(expected, actual_coordinates)

def test_build_coordinates_unsorted(self):
fileArec1 = vcf.VcfRecord("chr1", "1", "A", "C")
fileArec2 = vcf.VcfRecord("chr2", "5", "A", "G", "id=1")
fileBrec1 = vcf.VcfRecord("chr2", "16", "A", "G", "id=2")
fileBrec2 = vcf.VcfRecord("chr2", "12", "G", "C")

mock_readers = [MockVcfReader(input_filepath="fileA.vcf",
records=[fileArec1, fileArec2]),
MockVcfReader(input_filepath="fileB.vcf",
records=[fileBrec1, fileBrec2])]

self.assertRaisesRegexp(utils.JQException,
"One or more VCF files were not sorted.*",
merge._build_coordinates, mock_readers)
actual_log_errors = test.mock_logger.messages["ERROR"]
self.assertRegexpMatches(actual_log_errors[0],
r"VCF file:chrom:pos \[fileB.vcf:chr2:12\] is out of order")

def test_build_coordinates_multAltsEmpty(self):
fileArec1 = vcf.VcfRecord("chr1", "1", "A", "C")
fileArec2 = vcf.VcfRecord("chr2", "12", "A", "G", "id=1")
Expand Down Expand Up @@ -758,6 +740,11 @@ def test_sort_readers_vcfsResortedAsNecessary(self):
actual_log_infos = test.mock_logger.messages["INFO"]
self.assertEquals(1, len(actual_log_infos))
self.assertRegexpMatches(actual_log_infos[0], r"Sorting vcf \[unsorted.vcf\]")
actual_log_debugs = test.mock_logger.messages["DEBUG"]
self.assertEquals(1, len(actual_log_debugs))
self.assertRegexpMatches(actual_log_debugs[0],
r"VCF file:chrom:pos \[unsorted.vcf:chr1:42\] is out of order")


class MergeFunctionalTestCase(test_case.JacquardBaseTestCase):
def test_merge(self):
Expand Down

0 comments on commit 06a686c

Please sign in to comment.