From 7ce8522b8a46e9baa68d884ac0ffe7b0079ad1a9 Mon Sep 17 00:00:00 2001 From: Xiaodi Wu Date: Fri, 1 May 2009 14:45:10 -0400 Subject: [PATCH] Added scripts for new genomes and allele frequency queries, and modified *_map.py outputs. --- core/cgi_to_gff.py | 49 ++++++++++ core/gff_hgmd_map.py | 59 ++++++++--- core/gff_morbid_map.py | 31 ++++-- core/gff_omim_map.py | 59 ++++++++--- core/gff_snpedia_map.py | 145 ++++++++++++++++++++++++++++ core/gff_subtract.py | 26 +++++ core/json_allele_frequency_query.py | 87 +++++++++++++++++ core/server.py | 57 +++++++++-- core/snpinduse_to_gff.py | 48 +++++++++ 9 files changed, 522 insertions(+), 39 deletions(-) create mode 100644 core/cgi_to_gff.py create mode 100644 core/gff_snpedia_map.py create mode 100644 core/gff_subtract.py create mode 100644 core/json_allele_frequency_query.py create mode 100644 core/snpinduse_to_gff.py diff --git a/core/cgi_to_gff.py b/core/cgi_to_gff.py new file mode 100644 index 0000000..3fbeb71 --- /dev/null +++ b/core/cgi_to_gff.py @@ -0,0 +1,49 @@ +#!/usr/bin/python +# Filename: cgi_to_gff.py + +""" +usage: %prog CGI-Variations-Compact.csv ... +""" + +# Output GFF record for each entry in file(s) +# --- +# This code is part of the Trait-o-matic project and is governed by its license. + +import fileinput, os, sys + +def main(): + # return if we don't have the correct arguments + if len(sys.argv) < 2: + raise SystemExit(__doc__.replace("%prog", sys.argv[0])) + + for line in fileinput.input(): + if line.startswith("#"): + continue + l = line.strip().split(",") + if len(l) < 10 or l[4] not in ["=", "snp"] or l[5] not in ["=", "snp"]: + continue + + chr = l[1] + # for now, we treat PAR1 and PAR2 normally, with chrX coordinates + if chr in ["PAR1", "PAR2", "chrXnonPAR"]: + chr = "chrX" + elif chr == "chrYnonPAR": + chr = "chrY" + + out_line = chr + out_line += "\tcgi\tSNP\t" + out_line += str(long(l[2]) + 1) + "\t" + l[3] + out_line += "\t.\t+\t.\t" + + if l[7] == l[8]: + out_line += "alleles " + l[7] + else: + out_line += "alleles " + l[7] + "/" + l[8] + + out_line += ";ref_allele " + l[6] + out_line += ";total_score " + l[9] + + print out_line + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/core/gff_hgmd_map.py b/core/gff_hgmd_map.py index f3c54b2..5fb2d47 100644 --- a/core/gff_hgmd_map.py +++ b/core/gff_hgmd_map.py @@ -10,6 +10,7 @@ # This code is part of the Trait-o-matic project and is governed by its license. import os, string, sys +from copy import copy import MySQLdb import simplejson as json from utils import gff @@ -90,6 +91,7 @@ def main(): for record in gff_file: # lightly parse alleles alleles = record.attributes["alleles"].strip("\"").split("/") + ref_allele = record.attributes["ref_allele"].strip("\"") # determine zygosity if len(alleles) == 1: @@ -124,24 +126,57 @@ def main(): amino = d[1] pmid = d[2] acc_num = d[3] + + # keep, for internal purposes, a list of alleles minus the reference + leftover_alleles = copy(alleles) + try: + leftover_alleles.remove(ref_allele) + except ValueError: + pass # we see if what we designated the mutant allele is the phenotype- # associated allele, or if what we the reference sequence allele is # the phenotype-associated allele; if the latter, we have to make # sure that the genome we're looking at actually has the reference # allele - if string.lower(amino).endswith(string.lower(ref_aa)) or \ - (string.lower(amino).endswith(string.lower(mut_aa)) and - record.attributes["ref_allele"] in alleles): - output = { - "gene": gene, - "variant": str(record), - "amino_acid_change": amino, - "zygosity": zygosity, - "phenotype": disease, - "reference": "pmid:" + pmid, - } - print json.dumps(output) + if string.lower(amino).endswith(string.lower(mut_aa)): + amino_acid_change_and_position = aa[0] + str(aa_pos) + aa[-1] + #TODO: this doesn't work when we have multiple alleles + if len(leftover_alleles) == 1: + trait_allele = leftover_alleles[0] + else: + trait_allele = '?' + elif (string.lower(amino).endswith(string.lower(ref_aa)) and + ref_allele in alleles): + amino_acid_change_and_position = aa[-1] + str(aa_pos) + aa[0] + trait_allele = ref_allele + else: + continue + + # format for output + if record.start == record.end: + coordinates = str(record.start) + else: + coordinates = str(record.start) + "-" + str(record.end) + + genotype = "/".join(leftover_alleles) + if ref_allele in alleles: + genotype = ref_allele + "/" + genotype + + output = { + "chromosome": record.seqname, + "coordinates": coordinates, + "gene": gene, + "amino_acid_change": amino_acid_change_and_position, + "genotype": genotype, + "ref_allele": ref_allele, + "trait_allele": trait_allele, + "zygosity": zygosity, + "variant": str(record), + "phenotype": disease, + "reference": "pmid:" + pmid, + } + print json.dumps(output) # close database cursor and connection cursor.close() diff --git a/core/gff_morbid_map.py b/core/gff_morbid_map.py index ea96edf..05e73a1 100644 --- a/core/gff_morbid_map.py +++ b/core/gff_morbid_map.py @@ -42,16 +42,21 @@ def main(): gff_file = gff.input(sys.argv[1]) for record in gff_file: + # lightly parse alleles + alleles = record.attributes["alleles"].strip("\"").split("/") + ref_allele = record.attributes["ref_allele"].strip("\"") + # examine each amino acid change (this takes care of alternative splicings) amino_acid_changes = record.attributes["amino_acid"].strip("\"").split("/") # make sure not to duplicate what we print because of multiple alternative - # splicings; so, initialize an empty list to hold previous output strings - # so we can compare before printing - previous_output_strings = [] + # splicings; so, initialize an empty list to hold previous tuples of gene + # names, variant records, and phenotype strings, so that we can compare + previous_gene_variant_phenotype = [] # examine each alternative splicing for a in amino_acid_changes: + gene_variant_phenotype = [] output_strings = [] amino_acid = a.split(" ") @@ -82,11 +87,25 @@ def main(): continue for d in data: - disorder = d[0] + disorder = d[0].strip() omim = d[1] + gene_variant_phenotype.append((gene, str(record), disorder)) + + # format for output + if record.start == record.end: + coordinates = str(record.start) + else: + coordinates = str(record.start) + "-" + str(record.end) + + genotype = "/".join(alleles) output = { + "chromosome": record.seqname, + "coordinates": coordinates, "gene": gene, + "amino_acid_change": aa, + "genotype": genotype, + "ref_allele": ref_allele, "variant": str(record), "phenotype": disorder, "reference": "omim:" + str(omim), @@ -95,8 +114,8 @@ def main(): output_strings.append(json.dumps(output)) # actually only output what's not duplicating previous - if output_strings != previous_output_strings: - previous_output_strings = output_strings + if gene_variant_phenotype != previous_gene_variant_phenotype: + previous_gene_variant_phenotype = gene_variant_phenotype for o in output_strings: print o diff --git a/core/gff_omim_map.py b/core/gff_omim_map.py index 28a2710..1cda93a 100644 --- a/core/gff_omim_map.py +++ b/core/gff_omim_map.py @@ -10,6 +10,7 @@ # This code is part of the Trait-o-matic project and is governed by its license. import os, string, sys +from copy import copy import MySQLdb import simplejson as json from utils import gff @@ -64,6 +65,7 @@ def main(): for record in gff_file: # lightly parse alleles alleles = record.attributes["alleles"].strip("\"").split("/") + ref_allele = record.attributes["ref_allele"].strip("\"") # determine zygosity if len(alleles) == 1: @@ -102,23 +104,56 @@ def main(): else: allelic_variant_id = "omim:" + d[3] + # keep, for internal purposes, a list of alleles minus the reference + leftover_alleles = copy(alleles) + try: + leftover_alleles.remove(ref_allele) + except ValueError: + pass + # we see if what we designated the mutant allele is the phenotype- # associated allele, or if what we the reference sequence allele is # the phenotype-associated allele; if the latter, we have to make # sure that the genome we're looking at actually has the reference # allele - if string.lower(amino_acid_change).endswith(string.lower(mut_aa)) or \ - (string.lower(amino_acid_change).endswith(string.lower(ref_aa)) and - record.attributes["ref_allele"] in alleles): - output = { - "gene": gene, - "variant": str(record), - "amino_acid_change": amino_acid_change, - "zygosity": zygosity, - "phenotype": phenotype, - "reference": allelic_variant_id, - } - print json.dumps(output) + if string.lower(amino_acid_change).endswith(string.lower(mut_aa)): + amino_acid_change_and_position = aa[0] + str(aa_pos) + aa[-1] + #TODO: this doesn't work when we have multiple alleles + if len(leftover_alleles) == 1: + trait_allele = leftover_alleles[0] + else: + trait_allele = '?' + elif (string.lower(amino_acid_change).endswith(string.lower(ref_aa)) and + ref_allele in alleles): + amino_acid_change_and_position = aa[-1] + str(aa_pos) + aa[0] + trait_allele = ref_allele + else: + continue + + # format for output + if record.start == record.end: + coordinates = str(record.start) + else: + coordinates = str(record.start) + "-" + str(record.end) + + genotype = "/".join(leftover_alleles) + if ref_allele in alleles: + genotype = ref_allele + "/" + genotype + + output = { + "chromosome": record.seqname, + "coordinates": coordinates, + "gene": gene, + "amino_acid_change": amino_acid_change_and_position, + "genotype": genotype, + "ref_allele": ref_allele, + "trait_allele": trait_allele, + "zygosity": zygosity, + "variant": str(record), + "phenotype": phenotype, + "reference": allelic_variant_id, + } + print json.dumps(output) # close database cursor and connection cursor.close() diff --git a/core/gff_snpedia_map.py b/core/gff_snpedia_map.py new file mode 100644 index 0000000..2a35fd8 --- /dev/null +++ b/core/gff_snpedia_map.py @@ -0,0 +1,145 @@ +#!/usr/bin/python +# Filename: gff_snpedia_map.py + +""" +usage: %prog gff_file +""" + +# Output SNPedia information in JSON format, if available +# --- +# This code is part of the Trait-o-matic project and is governed by its license. + +import os, string, sys +import MySQLdb, MySQLdb.cursors +import simplejson as json +from utils import gff +from utils.biopython_utils import reverse_complement +from utils.bitset import * +from config import DB_HOST, DB_READ_USER, DB_READ_PASSWD, DB_READ_DATABASE + +location_query = ''' +SELECT chr, start, end FROM snpedia LIMIT %s,10000; +''' + +query = ''' +SELECT phenotype, pubmed_id FROM snpedia WHERE +rs_id=%s AND ((strand="+" AND genotype=%s) OR (strand="-" AND genotype=%s)); +''' + +def main(): + # return if we don't have the correct arguments + if len(sys.argv) < 2: + raise SystemExit(__doc__.replace("%prog", sys.argv[0])) + + # first, try to connect to the databases + try: + location_connection = MySQLdb.connect(cursorclass=MySQLdb.cursors.SSCursor, host=DB_HOST, user=DB_READ_USER, passwd=DB_READ_PASSWD, db=DB_READ_DATABASE) + location_cursor = location_connection.cursor() + connection = MySQLdb.connect(host=DB_HOST, user=DB_READ_USER, passwd=DB_READ_PASSWD, db=DB_READ_DATABASE) + cursor = connection.cursor() + except MySQLdb.OperationalError, message: + print "Error %d while connecting to database: %s" % (message[0], message[1]) + sys.exit() + + # build a dictionary of bitsets from the database (partly based on utility code from bx-python) + start_record = 0 + last_chromosome = None + last_bitset = None + bitsets = dict() + # do this in 10,000 chunks + while True: + location_cursor.execute(location_query, start_record) + previous_start_record = start_record + # go through what we retrieved + for datum in location_cursor: + start_record += 1 + chromosome = datum[0] + if chromosome != last_chromosome: + if chromosome not in bitsets: + bitsets[chromosome] = BinnedBitSet(MAX) + last_chromosome = chromosome + last_bitset = bitsets[chromosome] + start, end = datum[1], datum[2] + last_bitset.set_range(start, end - start) + # stop if we're done + if previous_start_record == start_record: + break + + # doing this intersect operation speeds up our task significantly for full genomes + gff_file = gff.input(sys.argv[1]) + for line in gff_file.intersect(bitsets): + # the one drawback is that intersect() was implemented to return strings, so we + # need to parse it + record = gff.input([line]).next() + # lightly parse to find the alleles and rs number + alleles = record.attributes["alleles"].strip("\"").split("/") + ref_allele = record.attributes["ref_allele"].strip("\"") + try: + xrefs = record.attributes["db_xref"].strip("\"").split(",") + except KeyError: + try: + xrefs = record.attributes["Dbxref"].strip("\"").split(",") + except KeyError: + continue + for x in xrefs: + if x.startswith("dbsnp:rs"): + rs = x.replace("dbsnp:", "") + break + + # quit if we don't have an rs number + if not rs: + continue + # we wouldn't know what to do with this, so pass it up for now + if len(alleles) > 2: + continue + + # create the genotype string from the given alleles + #TODO: do something about the Y chromosome + if len(alleles) == 1: + genotype = alleles[0] + ";" + alleles[0] + reverse_genotype = reverse_complement(alleles[0]) + ";" + reverse_complement(alleles[0]) + else: + genotype = ';'.join(sorted(alleles)) + reverse_genotype = ';'.join(sorted([reverse_complement(a) for a in alleles])) + + # query the database + cursor.execute(query, (rs, genotype, reverse_genotype)) + data = cursor.fetchall() + + # move on if we don't have info + if cursor.rowcount <= 0: + continue + + for d in data: + phenotype = d[0] + pubmed = d[1] + + # format for output + if record.start == record.end: + coordinates = str(record.start) + else: + coordinates = str(record.start) + "-" + str(record.end) + + if pubmed != "": + reference = "dbsnp:" + rs + ",pmid:" + pubmed.replace(",", ",pmid:") + else: + reference = "dbsnp:" + rs + + output = { + "chromosome": record.seqname, + "coordinates": coordinates, + "genotype": genotype, + "variant": str(record), + "phenotype": phenotype, + "reference": reference + } + print json.dumps(output) + + # close database cursor and connection + cursor.close() + connection.close() + location_cursor.close() + location_connection.close() + +if __name__ == "__main__": + main() diff --git a/core/gff_subtract.py b/core/gff_subtract.py new file mode 100644 index 0000000..39d1251 --- /dev/null +++ b/core/gff_subtract.py @@ -0,0 +1,26 @@ +#!/usr/bin/python +# Filename: gff_subtract.py + +""" +usage: %prog gff_file_1 gff_file_2 +""" + +# Output the subtraction of two GFF files, with attributes taken from the first +# --- +# This code is part of the Trait-o-matic project and is governed by its license. + +import sys +from utils import gff + +def main(): + # return if we don't have the correct arguments + if len(sys.argv) < 3: + raise SystemExit(__doc__.replace("%prog", sys.argv[0])) + + g1 = gff.input(sys.argv[1]) + g2 = gff.input(sys.argv[2]) + for line in g1.subtract(g2): + print line + +if __name__ == "__main__": + main() diff --git a/core/json_allele_frequency_query.py b/core/json_allele_frequency_query.py new file mode 100644 index 0000000..bf3d1f6 --- /dev/null +++ b/core/json_allele_frequency_query.py @@ -0,0 +1,87 @@ +#!/usr/bin/python +# Filename: json_allele_frequency_query.py + +""" +usage: %prog json ... [options] + -i, --in-place: move input into a backup file and write output in its place +""" + +# Output allele frequency information in JSON format, if available +# --- +# This code is part of the Trait-o-matic project and is governed by its license. + +import fileinput, os, string, sys +import MySQLdb +import simplejson as json +from utils import doc_optparse +from utils.biopython_utils import reverse_complement +from config import DB_HOST, DB_READ_USER, DB_READ_PASSWD, DB_READ_DATABASE + +query = ''' +SELECT strand, pop, ref_allele, ref_allele_freq, oth_allele, oth_allele_freq FROM hapmap27 WHERE +chr=%s AND start=%s AND end=%s; +''' + +def main(): + # parse options + option, args = doc_optparse.parse(__doc__) + + if len(args) < 1: + doc_optparse.exit() + + # first, try to connect to the databases + try: + connection = MySQLdb.connect(host=DB_HOST, user=DB_READ_USER, passwd=DB_READ_PASSWD, db=DB_READ_DATABASE) + cursor = connection.cursor() + except MySQLdb.OperationalError, message: + print "Error %d while connecting to database: %s" % (message[0], message[1]) + sys.exit() + + for line in fileinput.input(args, inplace=option.in_place): + l = json.loads(line.strip()) + + # sanity check--should always pass + if not ("chromosome" in l or "coordinates" in l): + continue + + # we can't handle variants longer than 1 bp + unavailable = True + if not "-" in "coordinates": + chr = l["chromosome"] + start = int(l["coordinates"]) - 1 + end = int(l["coordinates"]) + + # get allele frequency data based on coordinates + cursor.execute(query, (chr, start, end)) + data = cursor.fetchall() + + # move on if we don't have info + if cursor.rowcount > 0: + unavailable = False + + if unavailable: + l["maf"] = "N/A" + if "trait_allele" in l: + l["taf"] = "N/A" + else: + # output minor allele frequency as a dictionary, with population abbrs as keys + l["maf"] = dict(zip([d[1] for d in data], [float(min(d[3], d[5])) for d in data])) + # output trait allele frequency as a dictionary; this one is a little trickier + if "trait_allele" in l: + l["taf"] = dict() + for d in data: + if d[0] == "+" and l["trait_allele"] == d[2] \ + or d[0] == "-" and l["trait_allele"] == reverse_complement(d[2]): + l["taf"][d[1]] = float(d[3]) + elif d[0] == "+" and l["trait_allele"] == d[4] \ + or d[0] == "-" and l["trait_allele"] == reverse_complement(d[4]): + l["taf"][d[1]] = float(d[5]) + + print json.dumps(l) + + # close database cursor and connection + cursor.close() + connection.close() + +if __name__ == "__main__": + main() diff --git a/core/server.py b/core/server.py index a243a0c..a927e66 100644 --- a/core/server.py +++ b/core/server.py @@ -5,22 +5,47 @@ usage: %prog [options] -h, --host=STRING: the host on which to listen -p, --port=NUMBER: the port on which to listen + -t, --trackback: invoke the server's trackback function with arguments url, path, kind, request_token (does not start a new server) """ # Start an XMLRPC server for Trait-o-matic # --- # This code is part of the Trait-o-matic project and is governed by its license. -import base64, hashlib, os, shutil, subprocess, sys, urllib2 +import base64, hashlib, os, shutil, subprocess, sys, urllib, urllib2 from SimpleXMLRPCServer import SimpleXMLRPCServer as xrs from tempfile import mkstemp from utils import doc_optparse from config import UPLOAD_DIR, REFERENCE_GENOME +def trackback(url, params): + request = urllib2.Request(url, params) + request.add_header('User-agent', 'Trait-o-matic/20090123 Python') + request.add_header('Content-type', 'application/x-www-form-urlencoded;charset=utf-8') + try: + file = urllib2.urlopen(request) + except URLError: + return False + file.close() + return True + def main(): # parse options option, args = doc_optparse.parse(__doc__) + # deal with the trackback option + if option.trackback: + if len(args) < 4: + doc_optparse.exit() + url = args[0] + path = args[1] + kind = args[2] + request_token = args[3] + params = urllib.urlencode({ 'path': path, 'kind': kind, 'request_token': request_token }) + trackback(url, params) + return + + # otherwise, figure out the host and port host = option.host or "localhost" port = int(option.port or 8080) @@ -28,7 +53,7 @@ def main(): server = xrs((host, port)) server.register_introspection_functions() - def submit(genotype_file, coverage_file=None, username=None, password=None): + def submit(genotype_file, coverage_file='', username=None, password=None): # get genotype file r = urllib2.Request(genotype_file) if username is not None: @@ -56,26 +81,33 @@ def submit(genotype_file, coverage_file=None, username=None, password=None): return s server.register_function(submit) - def submit_local(genotype_file, coverage_file=None): + def submit_local(genotype_file, coverage_file='', trackback_url='', request_token=''): # execute script script_dir = os.path.dirname(sys.argv[0]) output_dir = os.path.dirname(genotype_file) # letters refer to scripts; numbers refer to outputs - args = { 'A': os.path.join(script_dir, "gff_query_twobit.py"), - 'B': os.path.join(script_dir, "gff_query_dbsnp.py"), + args = { 'A': os.path.join(script_dir, "gff_twobit_query.py"), + 'B': os.path.join(script_dir, "gff_dbsnp_query.py"), 'C': os.path.join(script_dir, "gff_nonsynonymous_filter.py"), 'D': os.path.join(script_dir, "gff_omim_map.py"), 'E': os.path.join(script_dir, "gff_hgmd_map.py"), 'F': os.path.join(script_dir, "gff_morbid_map.py"), + 'G': os.path.join(script_dir, "gff_snpedia_map.py"), + 'H': os.path.join(script_dir, "json_allele_frequency_query.py"), + 'Z': os.path.join(script_dir, "server.py"), 'in': genotype_file, 'reference': REFERENCE_GENOME, - '1': os.path.join(output_dir, "genotype.1.gff"), - '2': os.path.join(output_dir, "genotype.2.gff"), + 'url': trackback_url, + 'token': request_token, + '1': os.path.join(output_dir, "genotype.gff"), + '2': os.path.join(output_dir, "genotype.dbsnp.gff"), '3': os.path.join(output_dir, "ns.gff"), '4': os.path.join(output_dir, "omim.json"), '5': os.path.join(output_dir, "hgmd.json"), '6': os.path.join(output_dir, "morbid.json"), - '7': os.path.join(output_dir, "README") } + '7': os.path.join(output_dir, "snpedia.json"), + '8': "", + '0': os.path.join(output_dir, "README") } cmd = '''( python '%(A)s' '%(in)s' '%(reference)s' > '%(1)s' python '%(B)s' '%(1)s' > '%(2)s' @@ -83,7 +115,14 @@ def submit_local(genotype_file, coverage_file=None): python '%(D)s' '%(3)s' > '%(4)s' python '%(E)s' '%(3)s' > '%(5)s' python '%(F)s' '%(3)s' > '%(6)s' - touch '%(7)s' + python '%(G)s' '%(2)s' > '%(7)s' + python '%(H)s' '%(4)s' '%(5)s' '%(6)s' '%(7)s' --in-place + touch '%(0)s' + python '%(Z)s' -t '%(url)s' '%(4)s' 'out/omim' '%(token)s' + python '%(Z)s' -t '%(url)s' '%(5)s' 'out/hgmd' '%(token)s' + python '%(Z)s' -t '%(url)s' '%(6)s' 'out/morbid' '%(token)s' + python '%(Z)s' -t '%(url)s' '%(7)s' 'out/snpedia' '%(token)s' + python '%(Z)s' -t '%(url)s' '%(0)s' 'out/readme' '%(token)s' )&''' % args subprocess.call(cmd, shell=True) return output_dir diff --git a/core/snpinduse_to_gff.py b/core/snpinduse_to_gff.py new file mode 100644 index 0000000..dbc9514 --- /dev/null +++ b/core/snpinduse_to_gff.py @@ -0,0 +1,48 @@ +#!/usr/bin/python +# Filename: snpinduse_to_gff.py + +""" +usage: %prog *.snpinduse.txt ... +""" + +# Output GFF record for each entry in file(s) +# --- +# This code is part of the Trait-o-matic project and is governed by its license. + +import fileinput, os, sys + +# based on +def unique(seq): # not order preserving, but that's OK; we can sort it later + return {}.fromkeys(seq).keys() + +def main(): + # return if we don't have the correct arguments + if len(sys.argv) < 2: + raise SystemExit(__doc__.replace("%prog", sys.argv[0])) + + for line in fileinput.input(): + if not line.startswith("SNP:"): + continue + l = line.strip().split("|") + if l[2] != "SS_STRAND_FWD": + print >> sys.stderr, l[1] + continue + + snp = l[1].split(":") + details = snp[0].split("_") + alleles = snp[1].split("/") + + out_line = details[3] + out_line += "\t" + out_line += details[0] + out_line += "\tSNP\t" + out_line += details[4] + "\t" + details[4] + out_line += "\t.\t+\t.\t" + + out_line += "alleles " + '/'.join(unique(alleles)) + out_line += ";id " + details[1] + + print out_line + +if __name__ == "__main__": + main() \ No newline at end of file