Skip to content

Commit

Permalink
ex-266 (jebene) modified _validate_consistent_samples to take
Browse files Browse the repository at this point in the history
vcf_readers instead of file_readers
  • Loading branch information
dkriti committed May 27, 2015
1 parent 1793338 commit b57bcb0
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 32 deletions.
22 changes: 10 additions & 12 deletions jacquard/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,25 +658,23 @@ def _build_writers_to_readers(vcf_readers, output_path):

return writers_to_readers

def _get_readers_per_patient(file_readers):
def _get_readers_per_patient(vcf_readers):
readers_per_patient = defaultdict(list)
for file_reader in file_readers:
file_reader.open()
patient = file_reader.file_name.split(".")[0]
for line in file_reader.read_lines():
caller_meta_header = "##jacquard.translate.caller"
if line.startswith(caller_meta_header):
readers_per_patient[patient].append(line.split("=")[1])
file_reader.close()
for vcf_reader in vcf_readers:
patient = vcf_reader.file_name.split(".")[0]
for metaheader in vcf_reader.metaheaders:
if metaheader.startswith("##jacquard.translate.caller"):
readers_per_patient[patient].append(metaheader.split("=")[1])

return OrderedDict(sorted(readers_per_patient.items()))

def _validate_consistent_samples(file_readers):
readers_per_patient = _get_readers_per_patient(file_readers)
def _validate_consistent_samples(vcf_readers):
readers_per_patient = _get_readers_per_patient(vcf_readers)

all_callers = set()
for callers in readers_per_patient.values():
all_callers.update(callers)

warning = 0
for patient, callers in readers_per_patient.items():
missing_callers = set(all_callers).difference(set(callers))
Expand Down Expand Up @@ -812,7 +810,6 @@ def execute(args, execution_context):

input_files = sorted(glob.glob(os.path.join(input_path, "*.vcf")))
file_readers = [vcf.FileReader(i) for i in input_files]
_validate_consistent_samples(file_readers)

try:
file_writer = vcf.FileWriter(output_path)
Expand All @@ -824,6 +821,7 @@ def execute(args, execution_context):
#reduce excess iterations over the coordinates/vcf_readers
merge_vcf_readers = _create_vcf_readers(file_readers, format_tag_regex)
_validate_consistent_input(merge_vcf_readers, args.include_all)
_validate_consistent_samples(merge_vcf_readers)
merge_vcf_readers = _sort_readers(merge_vcf_readers,
output_path,
format_tag_regex)
Expand Down
40 changes: 20 additions & 20 deletions test/merge_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,19 +579,19 @@ def test_validate_consistent_arguments(self):
self.assertFalse("[{}] not found in {}".format(key, row_help))

def test_validate_consistent_samples_missingCaller(self):
input_files = [MockFileReader("A.mutect.vcf",
["##jacquard.translate.caller=MuTect"]),
MockFileReader("A.strelka.vcf",
["##jacquard.translate.caller=Strelka"]),
MockFileReader("A.varscan.vcf",
["##jacquard.translate.caller=VarScan"]),
MockFileReader("B.strelka.vcf",
["##jacquard.translate.caller=Strelka"]),
MockFileReader("B.varscan.vcf",
["##jacquard.translate.caller=VarScan"]),
MockFileReader("C.varscan.vcf",
["##jacquard.translate.caller=VarScan"])]
merge._validate_consistent_samples(input_files)
vcf_readers = [MockVcfReader(input_filepath="A.mutect.vcf",
metaheaders=["##jacquard.translate.caller=MuTect"]),
MockVcfReader(input_filepath="A.strelka.vcf",
metaheaders=["##jacquard.translate.caller=Strelka"]),
MockVcfReader(input_filepath="A.varscan.vcf",
metaheaders=["##jacquard.translate.caller=VarScan"]),
MockVcfReader(input_filepath="B.strelka.vcf",
metaheaders=["##jacquard.translate.caller=Strelka"]),
MockVcfReader(input_filepath="B.varscan.vcf",
metaheaders=["##jacquard.translate.caller=VarScan"]),
MockVcfReader(input_filepath="C.varscan.vcf",
metaheaders=["##jacquard.translate.caller=VarScan"])]
merge._validate_consistent_samples(vcf_readers)

actual_log_warnings = test.utils.mock_logger.messages["WARNING"]
expected_log_warnings = ["Sample [B] is missing VCF(s): ['MuTect']",
Expand All @@ -600,13 +600,13 @@ def test_validate_consistent_samples_missingCaller(self):
self.assertEquals(expected_log_warnings, actual_log_warnings)

def test_validate_consistent_samples_allMissingCallers(self):
input_files = [MockFileReader("A.mutect.vcf",
["##jacquard.translate.caller=MuTect"]),
MockFileReader("B.strelka.vcf",
["##jacquard.translate.caller=Strelka"]),
MockFileReader("C.mutect.vcf",
["##jacquard.translate.caller=MuTect"])]
merge._validate_consistent_samples(input_files)
vcf_readers = [MockVcfReader(input_filepath="A.mutect.vcf",
metaheaders=["##jacquard.translate.caller=MuTect"]),
MockVcfReader(input_filepath="B.strelka.vcf",
metaheaders=["##jacquard.translate.caller=Strelka"]),
MockVcfReader(input_filepath="C.mutect.vcf",
metaheaders=["##jacquard.translate.caller=MuTect"])]
merge._validate_consistent_samples(vcf_readers)

actual_log_warnings = test.utils.mock_logger.messages["WARNING"]
expected_log_warnings = ["Sample [A] is missing VCF(s): ['Strelka']",
Expand Down

0 comments on commit b57bcb0

Please sign in to comment.