1
+ '''
2
+ Created on Feb 8, 2011
3
+
4
+ @author: uemit.seren
5
+ '''
6
+
7
+ import os
8
+ import simplejson
9
+ import re
10
+
11
+ class ChromosomeData (object ):
12
+
13
+
14
+ def __init__ (self ,data_folder ,trackData ):
15
+ self .__lazyArrayChunks = {}
16
+ self .__data_folder = data_folder
17
+ self .__featureNCList = trackData ['featureNCList' ]
18
+ self .__sublistIndex = trackData ['sublistIndex' ]
19
+ self .__lazyIndex = trackData ['lazyIndex' ]
20
+ self .__subFeatureArray = trackData ['subfeatureArray' ]
21
+ self .__lazyFeatureFile = self .__data_folder + '/' + re .sub ('{chunk}' ,'%s' ,trackData ['lazyfeatureUrlTemplate' ])
22
+ self .__subFeatureFile = self .__data_folder + '/' + self .__subFeatureArray ['urlTemplate' ].replace ('{chunk}' ,'%s' )
23
+
24
+ def getGenes (self ,start ,end ,getFeatures = True ):
25
+ genes = []
26
+ genes = self ._getGenesFromNCList (self .__featureNCList ,start , end ,getFeatures ,genes )
27
+ return genes
28
+
29
+ def _getGenesFromNCList (self ,nclist ,start ,end ,getFeatures = True ,genes = []):
30
+ length = len (nclist )
31
+ i = self ._binary_search (nclist ,start )
32
+ while ((i < length ) and (i >= 0 ) and (nclist [i ][0 ] < end )):
33
+ if (isinstance (nclist [i ][self .__lazyIndex ],dict )):
34
+ fp = open (self .__lazyFeatureFile % nclist [i ][self .__lazyIndex ]['chunk' ])
35
+ lazyFeatures = simplejson .load (fp )
36
+ fp .close ()
37
+ genes = self ._getGenesFromNCList (lazyFeatures ,start ,end ,getFeatures ,genes )
38
+ else :
39
+ gene = self ._getGeneFeaturesForGene (nclist [i ],getFeatures )
40
+ genes .append (gene )
41
+ if len (nclist [i ]) >= self .__sublistIndex + 1 and nclist [i ][self .__sublistIndex ] != None :
42
+ genes = self ._getGenesFromNCList (nclist [i ][self .__sublistIndex ],start ,end ,getFeatures ,genes )
43
+ i = i + 1
44
+ return genes
45
+
46
+ def _getGeneFeaturesForGene (self ,gene ,getFeatures = True ):
47
+ stripped_gene = gene [0 :4 ]
48
+ stripped_gene .append ([])
49
+ if gene [4 ] != None and getFeatures :
50
+ for subFeature in gene [4 ]:
51
+ stripped_gene [4 ].append (self ._getGeneFeaturesFromPos (subFeature ,subFeature ))
52
+ return stripped_gene
53
+
54
+ def _getGeneFeaturesFromPos (self ,start ,end ):
55
+ import math
56
+ features = []
57
+ start = max ([0 ,start ])
58
+ end = min ([end ,self .__subFeatureArray ['length' ]])
59
+ firstChunk = int (math .floor (start / self .__subFeatureArray ['chunkSize' ]))
60
+ lastChunk = int (math .floor (end / self .__subFeatureArray ['chunkSize' ]))
61
+
62
+ for chunk in range (firstChunk ,lastChunk + 1 ):
63
+ if not chunk in self .__lazyArrayChunks :
64
+ fp = open (self .__subFeatureFile % chunk )
65
+ lazyFeatures = simplejson .load (fp )
66
+ fp .close ()
67
+ self .__lazyArrayChunks [chunk ] = lazyFeatures
68
+ features += self ._getGeneFeaturesFromChunk (chunk ,start ,end )
69
+ return features
70
+
71
+ def _getGeneFeaturesFromChunk (self ,chunk ,start ,end ):
72
+ features = []
73
+ chunkSize = self .__subFeatureArray ['chunkSize' ]
74
+ firstIndex = chunk * chunkSize
75
+ chunkStart = max ([start - firstIndex ,0 ])
76
+ chunkEnd = min ([end - firstIndex ,chunkSize - 1 ])
77
+ for i in range (chunkStart ,chunkEnd + 1 ):
78
+ features += self .__lazyArrayChunks [chunk ][i ]
79
+ return features
80
+
81
+ @classmethod
82
+ def _binary_search (cls ,arr , item , low = - 1 , high = None ,index = 1 ):
83
+ if high is None :
84
+ high = len (arr )
85
+ while (high - low > 1 ):
86
+ mid = (low + high ) >> 1
87
+ midval = arr [mid ][index ]
88
+ if midval > item :
89
+ high = mid
90
+ elif midval < item :
91
+ low = mid
92
+
93
+ return high
94
+
95
+ class DataSource (object ):
96
+ '''
97
+ classdocs
98
+ '''
99
+
100
+
101
+ def __init__ (self ,jbrowse_tracks_folder ,track_key ):
102
+ '''
103
+ Constructor
104
+ '''
105
+ self .__jbrowse_tracks_folder = jbrowse_tracks_folder
106
+ self .__track_key = track_key
107
+ self .__chromosomeSources = {}
108
+ dirList = os .listdir (self .__jbrowse_tracks_folder + "/data/tracks/" )
109
+ for fname in dirList :
110
+ self .__initChromocomeSources (fname )
111
+
112
+ def getGenes (self ,chromosome ,start ,end ,getFeatures = True ):
113
+ if chromosome not in self .__chromosomeSources :
114
+ raise Exception ('Chromosome Data-Source %s not found' % chromosome )
115
+ genes = self .__chromosomeSources [chromosome ].getGenes (start ,end ,getFeatures )
116
+ return genes
117
+
118
+
119
+ def __initChromocomeSources (self ,fname ):
120
+ fp = open (self ._getChromosomeTrackFolder (fname )+ 'trackData.json' )
121
+ trackData = simplejson .load (fp )
122
+ fp .close ()
123
+ self .__chromosomeSources [fname ] = ChromosomeData (self .__jbrowse_tracks_folder ,trackData )
124
+
125
+ def _getChromosomeTrackFolder (self ,chromosome ):
126
+ return self .__jbrowse_tracks_folder + '/data/tracks/%s/%s/' % (chromosome ,self .__track_key )
127
+
128
+
129
+
130
+
0 commit comments