diff --git a/jacquard/merge.py b/jacquard/merge.py index 5ecdd32..bcb156c 100644 --- a/jacquard/merge.py +++ b/jacquard/merge.py @@ -97,6 +97,28 @@ def _get_next(self): except StopIteration: return None +class TagRegistry(object): + def __init__(self): + self.metaheaders = {} + + + @staticmethod + def _passthrough(tag_id, metaheader): + return tag_id.startswith('JQ_') or \ + not metaheader.startswith('##FORMAT=<') + + def register_tag(self, tag_id, metaheader): + if self._passthrough(tag_id, metaheader): + new_tag_id = tag_id + else: + if metaheader not in self.metaheaders: + new_tag = 'JX{}_{}'.format(len(self.metaheaders) + 1, tag_id) + self.metaheaders[metaheader] = new_tag + + new_tag_id = self.metaheaders[metaheader] + return new_tag_id + + class NewMergeVcfReader(vcf.VcfReader): def __init__(self, file_reader, format_tag_mapping): super(self.__class__,self).__init__(file_reader) diff --git a/jacquard/utils/vcf.py b/jacquard/utils/vcf.py index 031d582..f5fdca8 100644 --- a/jacquard/utils/vcf.py +++ b/jacquard/utils/vcf.py @@ -34,6 +34,15 @@ def _get_tag_metaheaders(self, regex_exp): return tag_dict + @classmethod + def get_id_from_metaheader(cls, metaheader): + tag = re.match("^##.*=.*?[<,]ID=([^,>]*)", metaheader) + if tag: + return tag.group(1) + else: + msg = "VCF metaheader is missing ID tag [{}]" + raise utils.JQException(msg, metaheader) + @property def file_name(self): return self._file_reader.file_name diff --git a/test/merge_test.py b/test/merge_test.py index 0508e59..346f31e 100644 --- a/test/merge_test.py +++ b/test/merge_test.py @@ -28,6 +28,42 @@ def __init__(self, vcf_records): def next_if_equals(self, dummy): return next(self.vcf_records_iter) +class TagRegistryTestCase(test_case.JacquardBaseTestCase): + def test_register_tag(self): + registry = merge.TagRegistry() + actual_tag_id = registry.register_tag('DP', + '##FORMAT=') + self.assertEquals("JX1_DP", actual_tag_id) + + actual_tag_id = registry.register_tag('AF', + '##FORMAT=') + + self.assertEquals("JX2_AF", actual_tag_id) + + def test_register_tag_returnsPreviousTagId(self): + registry = merge.TagRegistry() + actual_tag_id = registry.register_tag('DP', + '##FORMAT=') + self.assertEquals("JX1_DP", actual_tag_id) + + actual_tag_id = registry.register_tag('DP', + '##FORMAT=') + + self.assertEquals("JX1_DP", actual_tag_id) + + def test_register_tag_passthroughJacquardTags(self): + registry = merge.TagRegistry() + actual_tag_id = registry.register_tag('JQ_AF_XX', + '##FORMAT=') + self.assertEquals("JQ_AF_XX", actual_tag_id) + + def test_register_tag_passthroughNonFormatTags(self): + registry = merge.TagRegistry() + actual_tag_id = registry.register_tag('FOO', + '##INFO=') + self.assertEquals("FOO", actual_tag_id) + + #TODO: rename class NewMergeVcfReaderTestCase(test_case.JacquardBaseTestCase): def setUp(self): diff --git a/test/utils/vcf_test.py b/test/utils/vcf_test.py index 24ccacf..d2d5196 100644 --- a/test/utils/vcf_test.py +++ b/test/utils/vcf_test.py @@ -622,6 +622,29 @@ def tearDown(self): self.output.close() sys.stderr = self.saved_stderr + def test_get_id_from_metaheader(self): + actual_id = VcfReader.get_id_from_metaheader('##FORMAT=') + self.assertEqual("FOO", actual_id) + actual_id = VcfReader.get_id_from_metaheader('##blah=') + self.assertEqual("FOO", actual_id) + actual_id = VcfReader.get_id_from_metaheader('##blah=') + self.assertEqual("FOO", actual_id) + actual_id = VcfReader.get_id_from_metaheader('##blah=') + self.assertEqual("FOO", actual_id) + actual_id = VcfReader.get_id_from_metaheader('##blah=') + self.assertEqual("FOO", actual_id) + + def test_get_id_from_metaheader_malformedMetaheaderTakesLastId(self): + actual_id = VcfReader.get_id_from_metaheader('##blah=') + self.assertEqual("BAZ", actual_id) + + def test_get_id_from_metaheader_missingIdRaisesException(self): + metaheader = '##blah=' + self.assertRaisesRegexp(utils.JQException, + r"VCF metaheader is missing ID tag \[##blah=\]", + VcfReader.get_id_from_metaheader, + metaheader) + def test_init(self): file_contents = ["##metaheader1\n", "##metaheader2\n",