Skip to content

Commit 0a4cb47

Browse files
ignore blacklisted
1 parent a6272ea commit 0a4cb47

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

scripts/preprocess-remote.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,17 @@ def saveSubset(filename, idx):
180180
return len(testing), len(training)
181181

182182
def main(args):
183+
# blacklisted datasets
184+
blacklisted = []
185+
if args.blacklist is not None:
186+
with open(args.blacklist, 'r') as f:
187+
blacklisted = json.load(f)
188+
pass
189+
blacklisted = set([
190+
'/'.join(item)
191+
for item in blacklisted
192+
])
193+
print(blacklisted)
183194
stats = {
184195
'placeId': [],
185196
'userId': [],
@@ -204,6 +215,13 @@ def main(args):
204215
if not (sid in stats['screenId']):
205216
stats['screenId'].append(sid)
206217
path = os.path.join(folder, placeId, userId, screenId)
218+
# check if the dataset is blacklisted
219+
# placeId twice since real screenId is "placeId/screenId"
220+
uuid = '/'.join([userId, placeId, placeId, screenId])
221+
print(uuid)
222+
if uuid in blacklisted:
223+
print('Skipping blacklisted dataset:', path)
224+
continue
207225
testFramesN, trainFramesN = processFolder(
208226
path,
209227
args.time_delta, args.test_ratio, args.frames_per_chunk,
@@ -246,6 +264,10 @@ def main(args):
246264
'--min-frames', type=int, default=0,
247265
help='Minimum number of frames in a chunk'
248266
)
267+
parser.add_argument(
268+
'--blacklist', type=str, default=None,
269+
help='Path to the blacklist file'
270+
)
249271

250272
args = parser.parse_args()
251273
main(args)

0 commit comments

Comments
 (0)