-
Notifications
You must be signed in to change notification settings - Fork 0
/
ontarget_phast.py
47 lines (36 loc) · 1.82 KB
/
ontarget_phast.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
########################################
#command: python ontarget_phast.py .bed .ontarget.phast -g hg19...
########################################
import sys,os
file_prefix=sys.argv[1].split('.')[0]
genome=sys.argv[4]
exon_for_ontarget='/data8/xiexw/gRNA_tool/genomes/'+genome+'.on.exon' #modifiable
phastCE='/data8/xiexw/gRNA_tool/genomes/'+genome+'.phast' #modifiable
intersectBed='intersectBed' #modifiable
groupBy='groupBy' #modifiable
#get on-target site of gRNA
os.system(intersectBed+' -a '+file_prefix+'.bed -b '+exon_for_ontarget+' -wa -wb -loj > '+file_prefix+'.ontarget')
#whether gRNA overlap with phast conserved element
infile=open(file_prefix+".ontarget","r"); outfile=open(file_prefix+".ontarget.bed","w")
i=1
while i!=0:
line=infile.readline()
if len(line)>0:
items=line.strip('\n').split('\t')
outfile.write('\t'.join(items[0:4])+'|~|'+items[9]+'\t'+items[4]+'\t'+items[5]+'\n')
else:
i=0
infile.close(); outfile.close()
os.system(intersectBed+' -a '+file_prefix+'.ontarget.bed -b '+phastCE+' -wa -wb -loj > '+file_prefix+'.phast')
os.system(groupBy+' -i '+file_prefix+'.phast -g 1,2,3,4 -c 11 -o sum,count > '+file_prefix+'.phast.sc') ##because one gRNA may overlap with several phastCEs
infile=open(file_prefix+".phast.sc","r"); outfile=open(file_prefix+".ontarget.phast","w")
i=1
while i!=0:
line=infile.readline()
if len(line)>0:
items=line.strip('\n').split('\t')
outfile.write('\t'.join(items[0:4])+'\t'+'('+items[4]+','+items[5]+')'+'\n')
else:
i=0
infile.close(); outfile.close()
os.remove(file_prefix+'.ontarget'); os.remove(file_prefix+'.ontarget.bed'); os.remove(file_prefix+'.phast'); os.remove(file_prefix+'.phast.sc')