Skip to content

Commit 1513548

Browse files
author
Seren
committed
package for Jbrowse Backend added
1 parent 3c06cbe commit 1513548

File tree

1 file changed

+130
-0
lines changed

1 file changed

+130
-0
lines changed

backend/JBrowseDataSource.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
'''
2+
Created on Feb 8, 2011
3+
4+
@author: uemit.seren
5+
'''
6+
7+
import os
8+
import simplejson
9+
import re
10+
11+
class ChromosomeData(object):
12+
13+
14+
def __init__(self,data_folder,trackData):
15+
self.__lazyArrayChunks = {}
16+
self.__data_folder = data_folder
17+
self.__featureNCList = trackData['featureNCList']
18+
self.__sublistIndex = trackData['sublistIndex']
19+
self.__lazyIndex = trackData['lazyIndex']
20+
self.__subFeatureArray = trackData['subfeatureArray']
21+
self.__lazyFeatureFile = self.__data_folder + '/' + re.sub('{chunk}','%s',trackData['lazyfeatureUrlTemplate'])
22+
self.__subFeatureFile = self.__data_folder+'/'+self.__subFeatureArray['urlTemplate'].replace('{chunk}','%s')
23+
24+
def getGenes(self,start,end,getFeatures=True):
25+
genes = []
26+
genes = self._getGenesFromNCList(self.__featureNCList,start, end,getFeatures,genes)
27+
return genes
28+
29+
def _getGenesFromNCList(self,nclist,start,end,getFeatures = True,genes = []):
30+
length = len(nclist)
31+
i = self._binary_search(nclist,start)
32+
while ((i < length) and (i >= 0) and (nclist[i][0] < end)):
33+
if (isinstance(nclist[i][self.__lazyIndex],dict)):
34+
fp = open(self.__lazyFeatureFile % nclist[i][self.__lazyIndex]['chunk'])
35+
lazyFeatures = simplejson.load(fp)
36+
fp.close()
37+
genes = self._getGenesFromNCList(lazyFeatures,start,end,getFeatures,genes)
38+
else:
39+
gene = self._getGeneFeaturesForGene(nclist[i],getFeatures)
40+
genes.append(gene)
41+
if len(nclist[i]) >= self.__sublistIndex +1 and nclist[i][self.__sublistIndex] != None:
42+
genes = self._getGenesFromNCList(nclist[i][self.__sublistIndex],start,end,getFeatures,genes)
43+
i = i+1
44+
return genes
45+
46+
def _getGeneFeaturesForGene(self,gene,getFeatures=True):
47+
stripped_gene = gene[0:4]
48+
stripped_gene.append([])
49+
if gene[4] != None and getFeatures:
50+
for subFeature in gene[4]:
51+
stripped_gene[4].append(self._getGeneFeaturesFromPos(subFeature,subFeature))
52+
return stripped_gene
53+
54+
def _getGeneFeaturesFromPos(self,start,end):
55+
import math
56+
features = []
57+
start = max([0,start])
58+
end = min([end,self.__subFeatureArray['length']])
59+
firstChunk = int(math.floor(start / self.__subFeatureArray['chunkSize']))
60+
lastChunk = int(math.floor(end / self.__subFeatureArray['chunkSize']))
61+
62+
for chunk in range(firstChunk,lastChunk+1):
63+
if not chunk in self.__lazyArrayChunks:
64+
fp = open(self.__subFeatureFile % chunk)
65+
lazyFeatures = simplejson.load(fp)
66+
fp.close()
67+
self.__lazyArrayChunks[chunk] = lazyFeatures
68+
features+=self._getGeneFeaturesFromChunk(chunk,start,end)
69+
return features
70+
71+
def _getGeneFeaturesFromChunk(self,chunk,start,end):
72+
features = []
73+
chunkSize = self.__subFeatureArray['chunkSize']
74+
firstIndex = chunk*chunkSize
75+
chunkStart = max([start - firstIndex,0])
76+
chunkEnd = min([end - firstIndex,chunkSize-1])
77+
for i in range(chunkStart,chunkEnd+1):
78+
features+=self.__lazyArrayChunks[chunk][i]
79+
return features
80+
81+
@classmethod
82+
def _binary_search(cls,arr, item, low=-1, high=None,index =1):
83+
if high is None:
84+
high = len(arr)
85+
while (high - low > 1):
86+
mid = (low + high) >> 1
87+
midval = arr[mid][index]
88+
if midval > item:
89+
high = mid
90+
elif midval < item :
91+
low = mid
92+
93+
return high
94+
95+
class DataSource(object):
96+
'''
97+
classdocs
98+
'''
99+
100+
101+
def __init__(self,jbrowse_tracks_folder,track_key):
102+
'''
103+
Constructor
104+
'''
105+
self.__jbrowse_tracks_folder = jbrowse_tracks_folder
106+
self.__track_key = track_key
107+
self.__chromosomeSources = {}
108+
dirList=os.listdir(self.__jbrowse_tracks_folder+"/data/tracks/")
109+
for fname in dirList:
110+
self.__initChromocomeSources(fname)
111+
112+
def getGenes(self,chromosome,start,end,getFeatures=True):
113+
if chromosome not in self.__chromosomeSources:
114+
raise Exception('Chromosome Data-Source %s not found' % chromosome)
115+
genes = self.__chromosomeSources[chromosome].getGenes(start,end,getFeatures)
116+
return genes
117+
118+
119+
def __initChromocomeSources(self,fname):
120+
fp = open(self._getChromosomeTrackFolder(fname)+'trackData.json')
121+
trackData = simplejson.load(fp)
122+
fp.close()
123+
self.__chromosomeSources[fname] = ChromosomeData(self.__jbrowse_tracks_folder,trackData)
124+
125+
def _getChromosomeTrackFolder(self,chromosome):
126+
return self.__jbrowse_tracks_folder + '/data/tracks/%s/%s/' % (chromosome,self.__track_key)
127+
128+
129+
130+

0 commit comments

Comments
 (0)