-
Notifications
You must be signed in to change notification settings - Fork 5
/
mk-video-class-for-sentence.py
72 lines (59 loc) · 3.14 KB
/
mk-video-class-for-sentence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# Written by Ye Kyaw Thu, Visiting Professor, LST, NECTEC, Thailand
# Create a new folder and copy augmented video files based on their classes (i.e. basename of the file)
# If you want to move instead of copy, update the code: shutil.copy to shutil.move
# Last updated: 20 June 2023
# How to run:
# If you want to save the class folder name with the original basename:
# $ time python ./mk-video-class-for-sentence.py videos class
#
# If you want to save the class folder name with 1, 2, 3 instead of basename:
# $ time python ./mk-video-class-for-sentence.py videos class --index
import os
import shutil
import sys
import argparse
import re
def get_classes_from_dir(base_dir):
class_names = set()
for file_name in os.listdir(base_dir):
if not os.path.isdir(os.path.join(base_dir, file_name)):
# Remove trailing parenthesized numbers from class names
class_name = re.match(r'^(.*?)(?:\s*\(\d+\))?$', os.path.splitext(file_name)[0]).group(1)
class_names.add(class_name)
return list(class_names)
def write_index_file(class_index, output_dir):
with open(os.path.join(output_dir, 'index.txt'), 'w', encoding='utf8') as f:
for class_name, index in class_index.items():
f.write(f'{class_name} : {index}\n')
def main(base_dir, output_folder, use_index):
base_dir = os.path.expanduser(base_dir)
output_dir = os.path.join(os.path.dirname(base_dir), output_folder)
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Get class names
classes = get_classes_from_dir(base_dir)
# Create a class index if the --index option is set
class_index = {class_name: index for index, class_name in enumerate(classes, 1)} if use_index else None
# Write the index file if the --index option is set
if class_index:
write_index_file(class_index, output_dir)
for root, dirs, files in os.walk(base_dir):
for class_name in classes:
# Use the class index for the class directory name if the --index option is set
class_dir_name = str(class_index[class_name]) if class_index else class_name
# Create class directory
class_dir = os.path.join(output_dir, class_dir_name)
os.makedirs(class_dir, exist_ok=True)
# Copy files
for filename in files:
base_name = os.path.splitext(filename)[0]
# Check if the file belongs to the current class
if base_name.startswith(class_name) or base_name.startswith(f"{class_name} ("):
shutil.copy(os.path.join(root, filename), os.path.join(class_dir, filename))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Sort files into class directories.')
parser.add_argument('base_dir', help='The directory containing the files to sort.')
parser.add_argument('output_folder', help='The directory to output the class directories to.')
parser.add_argument('-i', '--index', action='store_true', help='Use indexed class names instead of the original names.')
args = parser.parse_args()
main(args.base_dir, args.output_folder, args.index)