Skip to content

Commit

Permalink
ex-266 (jebene/dkriti/cgates) began work on TagRegistry
Browse files Browse the repository at this point in the history
  • Loading branch information
jebene committed May 28, 2015
1 parent 057dd86 commit ec745fe
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 0 deletions.
22 changes: 22 additions & 0 deletions jacquard/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,28 @@ def _get_next(self):
except StopIteration:
return None

class TagRegistry(object):
def __init__(self):
self.metaheaders = {}


@staticmethod
def _passthrough(tag_id, metaheader):
return tag_id.startswith('JQ_') or \
not metaheader.startswith('##FORMAT=<')

def register_tag(self, tag_id, metaheader):
if self._passthrough(tag_id, metaheader):
new_tag_id = tag_id
else:
if metaheader not in self.metaheaders:
new_tag = 'JX{}_{}'.format(len(self.metaheaders) + 1, tag_id)
self.metaheaders[metaheader] = new_tag

new_tag_id = self.metaheaders[metaheader]
return new_tag_id


class NewMergeVcfReader(vcf.VcfReader):
def __init__(self, file_reader, format_tag_mapping):
super(self.__class__,self).__init__(file_reader)
Expand Down
9 changes: 9 additions & 0 deletions jacquard/utils/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ def _get_tag_metaheaders(self, regex_exp):

return tag_dict

@classmethod
def get_id_from_metaheader(cls, metaheader):
tag = re.match("^##.*=.*?[<,]ID=([^,>]*)", metaheader)
if tag:
return tag.group(1)
else:
msg = "VCF metaheader is missing ID tag [{}]"
raise utils.JQException(msg, metaheader)

@property
def file_name(self):
return self._file_reader.file_name
Expand Down
36 changes: 36 additions & 0 deletions test/merge_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,42 @@ def __init__(self, vcf_records):
def next_if_equals(self, dummy):
return next(self.vcf_records_iter)

class TagRegistryTestCase(test_case.JacquardBaseTestCase):
def test_register_tag(self):
registry = merge.TagRegistry()
actual_tag_id = registry.register_tag('DP',
'##FORMAT=<ID=DP,Descrpition="thing1">')
self.assertEquals("JX1_DP", actual_tag_id)

actual_tag_id = registry.register_tag('AF',
'##FORMAT=<ID=AF,Descrpition="thing1">')

self.assertEquals("JX2_AF", actual_tag_id)

def test_register_tag_returnsPreviousTagId(self):
registry = merge.TagRegistry()
actual_tag_id = registry.register_tag('DP',
'##FORMAT=<ID=DP,Descrpition="thing1">')
self.assertEquals("JX1_DP", actual_tag_id)

actual_tag_id = registry.register_tag('DP',
'##FORMAT=<ID=DP,Descrpition="thing1">')

self.assertEquals("JX1_DP", actual_tag_id)

def test_register_tag_passthroughJacquardTags(self):
registry = merge.TagRegistry()
actual_tag_id = registry.register_tag('JQ_AF_XX',
'##FORMAT=<ID=DP,Descrpition="thing1">')
self.assertEquals("JQ_AF_XX", actual_tag_id)

def test_register_tag_passthroughNonFormatTags(self):
registry = merge.TagRegistry()
actual_tag_id = registry.register_tag('FOO',
'##INFO=<ID=FOO,Descrpition="thing1">')
self.assertEquals("FOO", actual_tag_id)


#TODO: rename
class NewMergeVcfReaderTestCase(test_case.JacquardBaseTestCase):
def setUp(self):
Expand Down
23 changes: 23 additions & 0 deletions test/utils/vcf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,29 @@ def tearDown(self):
self.output.close()
sys.stderr = self.saved_stderr

def test_get_id_from_metaheader(self):
actual_id = VcfReader.get_id_from_metaheader('##FORMAT=<ID=FOO>')
self.assertEqual("FOO", actual_id)
actual_id = VcfReader.get_id_from_metaheader('##blah=<ID=FOO>')
self.assertEqual("FOO", actual_id)
actual_id = VcfReader.get_id_from_metaheader('##blah=<ID=FOO,This=That>')
self.assertEqual("FOO", actual_id)
actual_id = VcfReader.get_id_from_metaheader('##blah=<This=That,ID=FOO>')
self.assertEqual("FOO", actual_id)
actual_id = VcfReader.get_id_from_metaheader('##blah=<This=That,ID=FOO,Mine=Yours>')
self.assertEqual("FOO", actual_id)

def test_get_id_from_metaheader_malformedMetaheaderTakesLastId(self):
actual_id = VcfReader.get_id_from_metaheader('##blah=<ID=FOO,ID=BAR,ID=BAZ>')
self.assertEqual("BAZ", actual_id)

def test_get_id_from_metaheader_missingIdRaisesException(self):
metaheader = '##blah=<This=That,Mine=Yours>'
self.assertRaisesRegexp(utils.JQException,
r"VCF metaheader is missing ID tag \[##blah=<This=That,Mine=Yours>\]",
VcfReader.get_id_from_metaheader,
metaheader)

def test_init(self):
file_contents = ["##metaheader1\n",
"##metaheader2\n",
Expand Down

0 comments on commit ec745fe

Please sign in to comment.