Skip to content

Commit a3ed9c0

Browse files
Flag to ignore too small datasets
1 parent bcb125f commit a3ed9c0

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

scripts/preprocess-remote.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,10 @@ def dropPadding(idx, padding):
124124
print('Frames before: {}. Frames after: {}'.format(len(idx), len(res)))
125125
return res
126126

127-
def processFolder(folder, timeDelta, testRatio, framesPerChunk, testPadding, skippedFrames):
127+
def processFolder(
128+
folder, timeDelta, testRatio, framesPerChunk, testPadding, skippedFrames,
129+
minFrames
130+
):
128131
print('Processing', folder)
129132
dataset = loadNpz(folder)
130133
for k, v in dataset.items():
@@ -164,6 +167,11 @@ def saveSubset(filename, idx):
164167
for fn in files:
165168
os.remove(os.path.join(folder, fn))
166169
print('Removed', len(files), 'files')
170+
171+
totalFrames = len(testing) + len(training)
172+
if minFrames < totalFrames:
173+
print('Not enough frames: %d < %d' % (totalFrames, minFrames))
174+
return 0, 0
167175
# save training and testing sets
168176
saveSubset('train.npz', training)
169177
saveSubset('test.npz', testing)
@@ -199,7 +207,8 @@ def main(args):
199207
testFramesN, trainFramesN = processFolder(
200208
path,
201209
args.time_delta, args.test_ratio, args.frames_per_chunk,
202-
args.test_padding, args.skipped_frames
210+
args.test_padding, args.skipped_frames,
211+
minFrames=args.min_frames
203212
)
204213
testFrames += testFramesN
205214
trainFrames += trainFramesN
@@ -233,6 +242,11 @@ def main(args):
233242
'--skipped-frames', type=str, default='train', choices=['train', 'test', 'drop'],
234243
help='What to do with skipped frames ("train", "test", or "drop")'
235244
)
245+
parser.add_argument(
246+
'--min-frames', type=int, default=0,
247+
help='Minimum number of frames in a chunk'
248+
)
249+
236250
args = parser.parse_args()
237251
main(args)
238252
pass

0 commit comments

Comments
 (0)