@@ -180,6 +180,17 @@ def saveSubset(filename, idx):
180
180
return len (testing ), len (training )
181
181
182
182
def main (args ):
183
+ # blacklisted datasets
184
+ blacklisted = []
185
+ if args .blacklist is not None :
186
+ with open (args .blacklist , 'r' ) as f :
187
+ blacklisted = json .load (f )
188
+ pass
189
+ blacklisted = set ([
190
+ '/' .join (item )
191
+ for item in blacklisted
192
+ ])
193
+ print (blacklisted )
183
194
stats = {
184
195
'placeId' : [],
185
196
'userId' : [],
@@ -204,6 +215,13 @@ def main(args):
204
215
if not (sid in stats ['screenId' ]):
205
216
stats ['screenId' ].append (sid )
206
217
path = os .path .join (folder , placeId , userId , screenId )
218
+ # check if the dataset is blacklisted
219
+ # placeId twice since real screenId is "placeId/screenId"
220
+ uuid = '/' .join ([userId , placeId , placeId , screenId ])
221
+ print (uuid )
222
+ if uuid in blacklisted :
223
+ print ('Skipping blacklisted dataset:' , path )
224
+ continue
207
225
testFramesN , trainFramesN = processFolder (
208
226
path ,
209
227
args .time_delta , args .test_ratio , args .frames_per_chunk ,
@@ -246,6 +264,10 @@ def main(args):
246
264
'--min-frames' , type = int , default = 0 ,
247
265
help = 'Minimum number of frames in a chunk'
248
266
)
267
+ parser .add_argument (
268
+ '--blacklist' , type = str , default = None ,
269
+ help = 'Path to the blacklist file'
270
+ )
249
271
250
272
args = parser .parse_args ()
251
273
main (args )
0 commit comments