@@ -130,6 +130,10 @@ def dropPadding(idx, padding):
130
130
131
131
def processFolder (folder , timeDelta , testRatio , framesPerChunk , testPadding , skippedFrames , minimumFrames , dropZeroDeltas ):
132
132
print ('Processing' , folder )
133
+ stats = {
134
+ 'deltas' : [],
135
+ 'durations' : [],
136
+ }
133
137
# load all.npz file if it exists
134
138
all_file = os .path .join (folder , 'all.npz' )
135
139
if os .path .exists (all_file ):
@@ -160,7 +164,7 @@ def processFolder(folder, timeDelta, testRatio, framesPerChunk, testPadding, ski
160
164
161
165
if N < minimumFrames :
162
166
print ('Dataset is too short. Skipping...' )
163
- return 0 , 0 , True
167
+ return 0 , 0 , True , None
164
168
# split dataset into sessions
165
169
sessions = Utils .extractSessions (dataset , float (timeDelta ))
166
170
# print sessions and their durations for debugging
@@ -174,6 +178,8 @@ def processFolder(folder, timeDelta, testRatio, framesPerChunk, testPadding, ski
174
178
print ('Session {} - {}: min={}, max={}, mean={}, frames={}, duration={} sec' .format (
175
179
start , end , np .min (delta ), np .max (delta ), np .mean (delta ), len (session_time ), duration
176
180
))
181
+ stats ['deltas' ].append (delta )
182
+ stats ['durations' ].append (duration )
177
183
continue
178
184
######################################################
179
185
# split each session into training and testing sets
@@ -205,7 +211,7 @@ def saveSubset(filename, idx):
205
211
print (', ' .join (['%s: %s' % (k , v .shape ) for k , v in dataset .items ()]))
206
212
207
213
print ('Processing ' , folder , 'done' )
208
- return len (testing ), len (training ), False
214
+ return len (testing ), len (training ), False , stats
209
215
210
216
def main (args ):
211
217
stats = {
@@ -219,6 +225,10 @@ def main(args):
219
225
folder = args .folder
220
226
foldersList = lambda x : [nm for nm in os .listdir (x ) if os .path .isdir (os .path .join (x , nm ))]
221
227
subfolders = foldersList (folder )
228
+ globalStats = {
229
+ 'deltas' : [],
230
+ 'durations' : [],
231
+ }
222
232
for placeId in subfolders :
223
233
if not (placeId in stats ['placeId' ]):
224
234
stats ['placeId' ].append (placeId )
@@ -232,7 +242,7 @@ def main(args):
232
242
if not (sid in stats ['screenId' ]):
233
243
stats ['screenId' ].append (sid )
234
244
path = os .path .join (folder , placeId , userId , screenId )
235
- testFramesN , trainFramesN , isSkipped = processFolder (
245
+ testFramesN , trainFramesN , isSkipped , new_stats = processFolder (
236
246
path ,
237
247
args .time_delta , args .test_ratio , args .frames_per_chunk ,
238
248
args .test_padding , args .skipped_frames ,
@@ -245,6 +255,8 @@ def main(args):
245
255
# store the number of frames per chunk
246
256
sid = '%s/%s/%s' % (placeId , userId , screenId )
247
257
framesPerChunk [sid ] = testFramesN + trainFramesN
258
+ for k , v in new_stats .items ():
259
+ globalStats [k ].extend (v )
248
260
continue
249
261
print ('Total: %d training frames, %d testing frames' % (trainFrames , testFrames ))
250
262
@@ -255,6 +267,22 @@ def main(args):
255
267
print ('-' * 80 )
256
268
for k , v in framesPerChunk .items ():
257
269
print ('%s: %d frames' % (k , v ))
270
+ ###########################################
271
+ def plot_histogram (data , title , filename ):
272
+ import matplotlib .pyplot as plt
273
+ plt .hist (data , bins = 100 )
274
+ plt .title (title )
275
+ plt .grid ()
276
+ plt .savefig (filename )
277
+ plt .close ()
278
+ plt .clf ()
279
+ return
280
+
281
+ for k , v in globalStats .items ():
282
+ if 0 == len (v ): continue
283
+ v = np .concatenate (v )
284
+ plot_histogram (v , 'Histogram of %s' % k , os .path .join (folder , '%s.png' % k ))
285
+ continue
258
286
return
259
287
260
288
if __name__ == '__main__' :
0 commit comments