In [1]:
from PIL.Image import Resampling
from PIL import Image
import os

def convert_and_resize_jpg_to_webp(input_path, output_path, max_width=640):
    # Open the JPG image
    with Image.open(input_path) as img:
        # Check if the image needs to be resized
        if img.width > max_width:
            # Calculate the new height to maintain the aspect ratio
            new_height = int((max_width / img.width) * img.height)
            # Resize the image
            img = img.resize((max_width, new_height), Resampling.LANCZOS)
        # Save the image in WebP format
        img.save(output_path, 'webp')


In [2]:
def get_webp_file_path(keyframe_path):
    keyframe_webp_dir = keyframe_path.replace('Keyframes', 'Keyframes_webp').replace('.jpg', '.webp')
    os.makedirs(os.path.dirname(keyframe_webp_dir), exist_ok=True)
    return keyframe_webp_dir

In [3]:
import os

def find_all_keyframe_files(root_dir, extensions=[".jpg"]):
    files = []
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            if any(filename.lower().endswith(ext) for ext in extensions):
                files.append(os.path.join(dirpath, filename))
    return files

In [4]:
keyframes = find_all_keyframe_files('/media/daoan/T7 Shield2/AI_Challenge_2024_DATA/Keyframes')
keywords = ["L25_extra", "L26_extra", "L27_extra", "L28_extra", "L29_extra", "L30_extra"]
keyframes = [path for path in keyframes if any(keyword in path for keyword in keywords)]
len(keyframes)

305552

In [5]:
def process_keyframe(keyframe):
    keyframe_webp = get_webp_file_path(keyframe)
    convert_and_resize_jpg_to_webp(keyframe, keyframe_webp)


In [6]:
import os
from PIL.Image import Resampling
from PIL import Image
from tqdm import tqdm
from multiprocessing import Pool, cpu_count

with Pool(cpu_count()) as pool:
    list(tqdm(pool.imap(process_keyframe, keyframes), total=len(keyframes)))

100%|██████████| 305552/305552 [29:42<00:00, 171.39it/s]
