In [None]:
%%capture
!pip install easyocr

In [None]:
import os
import cv2
import glob
import json
import torch
import easyocr
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
keyframes_dir = '/kaggle/input/keyframes-v3-aic2024'
all_keyframe_paths = dict()
for part in sorted(os.listdir(keyframes_dir)):
    # data_part = part.split('_')[-1]
    parts = part.split('_')
    data_part = parts[-2] + "_" + parts[-1] if len(parts) == 3 else parts[-1]
    
    all_keyframe_paths[data_part] =  dict()
    data_part_path = f'{keyframes_dir}/Keyframes_{data_part}/keyframes'
    frame_dirs = sorted(os.listdir(data_part_path))
    frame_ids = [frame_dir.split('_')[-1] for frame_dir in frame_dirs]
    for frame_id, frame_dir in zip(frame_ids, frame_dirs):
        keyframe_paths = sorted(glob.glob(f'{data_part_path}/{frame_dir}/*.jpg'))
        all_keyframe_paths[data_part][frame_id] =  keyframe_paths

In [None]:
all_keyframe_paths.keys()

In [None]:
keys = list(all_keyframe_paths.keys())
midpoint = len(keys) // 2
sub_dict_1 = {key: all_keyframe_paths[key] for key in keys[:midpoint]}
sub_dict_2 = {key: all_keyframe_paths[key] for key in keys[midpoint:]}

In [None]:
sub_dict_2.keys()

In [None]:
all_keyframe_paths = sub_dict_2

In [None]:
reader = easyocr.Reader(['vi'], gpu=True) # this needs to run only once to load the model into memory

In [None]:
bs = 16
save_dir = '/kaggle/working/ocr'

if not os.path.exists(save_dir):
    os.mkdir(save_dir)

keys = sorted(all_keyframe_paths.keys())
for key in tqdm(keys):
    video_keyframe_paths = all_keyframe_paths[key]
    video_ids = sorted(video_keyframe_paths.keys())

    if not os.path.exists(os.path.join(save_dir, key)):
        os.mkdir(os.path.join(save_dir, key))

    for video_id in tqdm(video_ids):
        video_keyframe_path = video_keyframe_paths[video_id]
        video_ocr_results = []
        video_ocr_results_path = []
        for i in range(0, len(video_keyframe_path), bs):
            # Support batchsize inferencing
            image_paths = video_keyframe_path[i:i+bs]

            results = reader.readtext_batched(image_paths, batch_size=len(image_paths))
            for result in results:
                refined_result = []
                for item in result: 
                    if item[2] > 0.5:
                        refined_result.append(item)   
                refined_result = easyocr.utils.get_paragraph(refined_result)
                text_detected = [item[1] for item in refined_result]
                video_ocr_results.append(text_detected)
                video_ocr_results_path.append(image_paths)

        with open(f'{save_dir}/{key}/{video_id}.json',"w", encoding='utf-8') as jsonfile:
            json.dump(video_ocr_results, jsonfile, ensure_ascii=False)