# Split Test Videos

This file is used to split the raw test set into at most 300 frame sub-videos with corresponding split label files. 

Users should be able to download the batched data here.

In [1]:
import sys
import xmltodict
from collections import OrderedDict
import imageio.v3 as iio
from pathlib import Path
from copy import deepcopy

from turbx import REPO_PATH, log
from turbx.data import find_files

# max number of frames per video
MAX_LEN=300
FPS=10.0

[DEBUG] 12/08/2022 01:16:57PM: MainProcess: __init__.py - Loaded logging config file: /home/nowa201/Projects/triton-crdp/turbx/src/turbx/logging.yaml


In [2]:
label_path = f"{REPO_PATH}/data/labels/cvat-video-1.1/test"
video_path = f"{REPO_PATH}/data/mp4/test"
label_out_path = Path(f"{REPO_PATH}/data/labels/cvat-video-1.1/batched_test")
video_out_path = Path(f"{REPO_PATH}/data/mp4/batched_test")
label_out_path.mkdir(exist_ok=True)
video_out_path.mkdir(exist_ok=True)

# collect list of all files in paths
video_files = find_files(video_path)
label_files = find_files(label_path, file_type=".xml")

log.debug(video_files[:3])
log.debug(label_files[:3])

[DEBUG] 12/08/2022 01:16:57PM: MainProcess: 332178680.py - [PosixPath('/home/nowa201/Projects/triton-crdp/turbx/data/mp4/test/2010-09-08_154500_HF_fore.mp4'), PosixPath('/home/nowa201/Projects/triton-crdp/turbx/data/mp4/test/2010-09-08_150001_HF_fore.mp4'), PosixPath('/home/nowa201/Projects/triton-crdp/turbx/data/mp4/test/2010-09-09_053000_HF_aft.mp4')]
[DEBUG] 12/08/2022 01:16:57PM: MainProcess: 332178680.py - [PosixPath('/home/nowa201/Projects/triton-crdp/turbx/data/labels/cvat-video-1.1/test/51.xml'), PosixPath('/home/nowa201/Projects/triton-crdp/turbx/data/labels/cvat-video-1.1/test/43.xml'), PosixPath('/home/nowa201/Projects/triton-crdp/turbx/data/labels/cvat-video-1.1/test/39.xml')]


In [3]:
# loop through label files, match with video, batch into subfiles

def split_tracks(track_list, start_frame, end_frame):
    if not isinstance(track_list, list):
        track_list = [track_list]

    otrack_list = []
    #otrack_dict = {'@id': None, '@label': None, '@source': None, 'box': []}
    for track in track_list:
        otrack_dict = OrderedDict({'@id': track['@id'], '@label': track['@label'], '@source': track['@source'], 'box': []})
        for box in track['box']:
            box_frame_idx = int(box['@frame'])
            if start_frame <= box_frame_idx <= end_frame:
                otrack_dict['box'].append(box)
        if len(otrack_dict['box']) > 0:
            otrack_list.append(otrack_dict.copy())

    return otrack_list


# loop through every label file
for label_file in label_files:
    # vars
    video_file = Path()
    sub_video_counter = 0

    with open(str(label_file), 'rb') as f:
        label_xml_dict = xmltodict.parse(f, encoding='utf-8', xml_attribs=True)
    video_info_dict = label_xml_dict['annotations']['meta']['task']
    video_size = int(video_info_dict['size'])
    log.info(f"Splitting video: {video_info_dict['id']}")

    # verify video with name exists
    for v in video_files:
        if video_info_dict["name"] in v.name:
            video_file = v
            break
    # go to next iteration if video didn't exist
    if not video_file.exists():
        continue

    # loop through greater video splitting into sub_videos
    video = iio.imread(str(video_file))
    while (sub_video_counter*MAX_LEN) < video_size:
        log.info(f"Creating sub-video: {video_info_dict['id']}/{sub_video_counter:04}")

        # NOTE: USE INCLUSIVE INDEXING OR ADD 1/USE SIZE
        start_frame = sub_video_counter*MAX_LEN # 0
        end_frame = ((sub_video_counter+1)*MAX_LEN)-1 # 299
        end_frame = end_frame if end_frame < video_size else (video_size-1) 
        size = (end_frame - start_frame)+1
        sub_video_label = deepcopy(label_xml_dict)

        # E.g: name-0001.mp4
        sub_video_file = Path(f"{video_out_path}/{video_file.stem}-{sub_video_counter:04}{video_file.suffix}")
        # write start_frame to end from video_file
        sub_video = video[start_frame:(start_frame + size), ...]
        assert(len(sub_video) == size), "Sub video does not match label \'video_size\'."
        writer = iio.imwrite(sub_video_file, sub_video, fps=FPS) # so much faster than writing each frame

        # modify label file for sub-video
        # split tracks based on those in sub-video (split track if necessary - can keep track id?)
        id = f"{video_info_dict['id']}{sub_video_counter:04}"
        sub_video_label['annotations']['meta']['task']['id'] = id
        sub_video_label['annotations']['meta']['task']['name'] = f"{sub_video_file.name}" 
        sub_video_label['annotations']['meta']['task']['size'] = size
        sub_video_label['annotations']['meta']['task']['start_frame'] = start_frame 
        sub_video_label['annotations']['meta']['task']['stop_frame'] = end_frame 
        sub_video_label['annotations']['meta']['task']['segments'] = OrderedDict({'segment': OrderedDict({'id': video_info_dict['segments']['segment']['id'], 'start': start_frame, 'stop': end_frame, 'url': video_info_dict['segments']['segment']['url']})}) 
        try:
            track_list = split_tracks(deepcopy(label_xml_dict['annotations']['track']), start_frame, end_frame)
            if len(track_list) > 0:
                sub_video_label['annotations']['track'] = track_list
            else:
                del sub_video_label['annotations']['track']
        except KeyError:
            pass
            
        with open(f"{str(label_out_path)}/{id}.xml", 'w') as f:
            xmltodict.unparse(sub_video_label, f)

        # increment for next iter
        sub_video_counter +=1

[INFO] 12/08/2022 01:16:57PM: MainProcess: 162261361.py - Splitting video: 51
[INFO] 12/08/2022 01:17:29PM: MainProcess: 162261361.py - Creating sub-video: 51/0000
[INFO] 12/08/2022 01:17:33PM: MainProcess: 162261361.py - Creating sub-video: 51/0001
[INFO] 12/08/2022 01:17:37PM: MainProcess: 162261361.py - Creating sub-video: 51/0002
[INFO] 12/08/2022 01:17:40PM: MainProcess: 162261361.py - Creating sub-video: 51/0003
[INFO] 12/08/2022 01:17:44PM: MainProcess: 162261361.py - Creating sub-video: 51/0004
[INFO] 12/08/2022 01:17:48PM: MainProcess: 162261361.py - Creating sub-video: 51/0005
[INFO] 12/08/2022 01:17:51PM: MainProcess: 162261361.py - Creating sub-video: 51/0006
[INFO] 12/08/2022 01:17:55PM: MainProcess: 162261361.py - Creating sub-video: 51/0007
[INFO] 12/08/2022 01:17:59PM: MainProcess: 162261361.py - Creating sub-video: 51/0008
[INFO] 12/08/2022 01:18:02PM: MainProcess: 162261361.py - Creating sub-video: 51/0009
[INFO] 12/08/2022 01:18:06PM: MainProcess: 162261361.py - Crea



[INFO] 12/08/2022 01:22:24PM: MainProcess: 162261361.py - Creating sub-video: 39/0001




[INFO] 12/08/2022 01:22:27PM: MainProcess: 162261361.py - Creating sub-video: 39/0002




[INFO] 12/08/2022 01:22:30PM: MainProcess: 162261361.py - Creating sub-video: 39/0003




[INFO] 12/08/2022 01:22:33PM: MainProcess: 162261361.py - Creating sub-video: 39/0004




[INFO] 12/08/2022 01:22:36PM: MainProcess: 162261361.py - Creating sub-video: 39/0005




[INFO] 12/08/2022 01:22:39PM: MainProcess: 162261361.py - Creating sub-video: 39/0006




[INFO] 12/08/2022 01:22:42PM: MainProcess: 162261361.py - Creating sub-video: 39/0007




[INFO] 12/08/2022 01:22:45PM: MainProcess: 162261361.py - Creating sub-video: 39/0008




[INFO] 12/08/2022 01:22:48PM: MainProcess: 162261361.py - Creating sub-video: 39/0009




[INFO] 12/08/2022 01:22:51PM: MainProcess: 162261361.py - Creating sub-video: 39/0010




[INFO] 12/08/2022 01:22:54PM: MainProcess: 162261361.py - Creating sub-video: 39/0011




[INFO] 12/08/2022 01:22:57PM: MainProcess: 162261361.py - Creating sub-video: 39/0012




[INFO] 12/08/2022 01:23:00PM: MainProcess: 162261361.py - Creating sub-video: 39/0013




[INFO] 12/08/2022 01:23:03PM: MainProcess: 162261361.py - Creating sub-video: 39/0014




[INFO] 12/08/2022 01:23:06PM: MainProcess: 162261361.py - Creating sub-video: 39/0015




[INFO] 12/08/2022 01:23:09PM: MainProcess: 162261361.py - Creating sub-video: 39/0016




[INFO] 12/08/2022 01:23:12PM: MainProcess: 162261361.py - Creating sub-video: 39/0017




[INFO] 12/08/2022 01:23:15PM: MainProcess: 162261361.py - Creating sub-video: 39/0018




[INFO] 12/08/2022 01:23:18PM: MainProcess: 162261361.py - Creating sub-video: 39/0019




[INFO] 12/08/2022 01:23:21PM: MainProcess: 162261361.py - Creating sub-video: 39/0020




[INFO] 12/08/2022 01:23:24PM: MainProcess: 162261361.py - Creating sub-video: 39/0021




[INFO] 12/08/2022 01:23:27PM: MainProcess: 162261361.py - Creating sub-video: 39/0022




[INFO] 12/08/2022 01:23:30PM: MainProcess: 162261361.py - Creating sub-video: 39/0023




[INFO] 12/08/2022 01:23:32PM: MainProcess: 162261361.py - Splitting video: 55
[INFO] 12/08/2022 01:24:44PM: MainProcess: 162261361.py - Creating sub-video: 55/0000
[INFO] 12/08/2022 01:24:51PM: MainProcess: 162261361.py - Creating sub-video: 55/0001
[INFO] 12/08/2022 01:24:57PM: MainProcess: 162261361.py - Creating sub-video: 55/0002
[INFO] 12/08/2022 01:25:04PM: MainProcess: 162261361.py - Creating sub-video: 55/0003
[INFO] 12/08/2022 01:25:10PM: MainProcess: 162261361.py - Creating sub-video: 55/0004
[INFO] 12/08/2022 01:25:16PM: MainProcess: 162261361.py - Creating sub-video: 55/0005
[INFO] 12/08/2022 01:25:22PM: MainProcess: 162261361.py - Creating sub-video: 55/0006
[INFO] 12/08/2022 01:25:29PM: MainProcess: 162261361.py - Creating sub-video: 55/0007
[INFO] 12/08/2022 01:25:35PM: MainProcess: 162261361.py - Creating sub-video: 55/0008
[INFO] 12/08/2022 01:25:41PM: MainProcess: 162261361.py - Creating sub-video: 55/0009
[INFO] 12/08/2022 01:25:48PM: MainProcess: 162261361.py - Crea



[INFO] 12/08/2022 01:29:31PM: MainProcess: 162261361.py - Creating sub-video: 40/0001




[INFO] 12/08/2022 01:29:34PM: MainProcess: 162261361.py - Creating sub-video: 40/0002




[INFO] 12/08/2022 01:29:37PM: MainProcess: 162261361.py - Creating sub-video: 40/0003




[INFO] 12/08/2022 01:29:41PM: MainProcess: 162261361.py - Creating sub-video: 40/0004




[INFO] 12/08/2022 01:29:44PM: MainProcess: 162261361.py - Creating sub-video: 40/0005




[INFO] 12/08/2022 01:29:47PM: MainProcess: 162261361.py - Creating sub-video: 40/0006




[INFO] 12/08/2022 01:29:50PM: MainProcess: 162261361.py - Creating sub-video: 40/0007




[INFO] 12/08/2022 01:29:53PM: MainProcess: 162261361.py - Creating sub-video: 40/0008




[INFO] 12/08/2022 01:29:57PM: MainProcess: 162261361.py - Creating sub-video: 40/0009




[INFO] 12/08/2022 01:30:00PM: MainProcess: 162261361.py - Creating sub-video: 40/0010




[INFO] 12/08/2022 01:30:03PM: MainProcess: 162261361.py - Creating sub-video: 40/0011




[INFO] 12/08/2022 01:30:07PM: MainProcess: 162261361.py - Creating sub-video: 40/0012




[INFO] 12/08/2022 01:30:10PM: MainProcess: 162261361.py - Creating sub-video: 40/0013




[INFO] 12/08/2022 01:30:13PM: MainProcess: 162261361.py - Creating sub-video: 40/0014




[INFO] 12/08/2022 01:30:16PM: MainProcess: 162261361.py - Creating sub-video: 40/0015




[INFO] 12/08/2022 01:30:20PM: MainProcess: 162261361.py - Creating sub-video: 40/0016




[INFO] 12/08/2022 01:30:23PM: MainProcess: 162261361.py - Creating sub-video: 40/0017




[INFO] 12/08/2022 01:30:26PM: MainProcess: 162261361.py - Creating sub-video: 40/0018




[INFO] 12/08/2022 01:30:30PM: MainProcess: 162261361.py - Creating sub-video: 40/0019




[INFO] 12/08/2022 01:30:33PM: MainProcess: 162261361.py - Creating sub-video: 40/0020




[INFO] 12/08/2022 01:30:36PM: MainProcess: 162261361.py - Creating sub-video: 40/0021




[INFO] 12/08/2022 01:30:40PM: MainProcess: 162261361.py - Creating sub-video: 40/0022




[INFO] 12/08/2022 01:30:42PM: MainProcess: 162261361.py - Splitting video: 49
[INFO] 12/08/2022 01:31:20PM: MainProcess: 162261361.py - Creating sub-video: 49/0000
[INFO] 12/08/2022 01:31:25PM: MainProcess: 162261361.py - Creating sub-video: 49/0001
[INFO] 12/08/2022 01:31:30PM: MainProcess: 162261361.py - Creating sub-video: 49/0002
[INFO] 12/08/2022 01:31:34PM: MainProcess: 162261361.py - Creating sub-video: 49/0003
[INFO] 12/08/2022 01:31:39PM: MainProcess: 162261361.py - Creating sub-video: 49/0004
[INFO] 12/08/2022 01:31:44PM: MainProcess: 162261361.py - Creating sub-video: 49/0005
[INFO] 12/08/2022 01:31:48PM: MainProcess: 162261361.py - Creating sub-video: 49/0006
[INFO] 12/08/2022 01:31:53PM: MainProcess: 162261361.py - Creating sub-video: 49/0007
[INFO] 12/08/2022 01:31:58PM: MainProcess: 162261361.py - Creating sub-video: 49/0008
[INFO] 12/08/2022 01:32:02PM: MainProcess: 162261361.py - Creating sub-video: 49/0009
[INFO] 12/08/2022 01:32:07PM: MainProcess: 162261361.py - Crea



[INFO] 12/08/2022 01:36:55PM: MainProcess: 162261361.py - Creating sub-video: 47/0001




[INFO] 12/08/2022 01:36:59PM: MainProcess: 162261361.py - Creating sub-video: 47/0002




[INFO] 12/08/2022 01:37:02PM: MainProcess: 162261361.py - Creating sub-video: 47/0003




[INFO] 12/08/2022 01:37:05PM: MainProcess: 162261361.py - Creating sub-video: 47/0004




[INFO] 12/08/2022 01:37:08PM: MainProcess: 162261361.py - Creating sub-video: 47/0005




[INFO] 12/08/2022 01:37:12PM: MainProcess: 162261361.py - Creating sub-video: 47/0006




[INFO] 12/08/2022 01:37:15PM: MainProcess: 162261361.py - Creating sub-video: 47/0007




[INFO] 12/08/2022 01:37:19PM: MainProcess: 162261361.py - Creating sub-video: 47/0008




[INFO] 12/08/2022 01:37:22PM: MainProcess: 162261361.py - Creating sub-video: 47/0009




[INFO] 12/08/2022 01:37:25PM: MainProcess: 162261361.py - Creating sub-video: 47/0010




[INFO] 12/08/2022 01:37:29PM: MainProcess: 162261361.py - Creating sub-video: 47/0011




[INFO] 12/08/2022 01:37:32PM: MainProcess: 162261361.py - Creating sub-video: 47/0012




[INFO] 12/08/2022 01:37:36PM: MainProcess: 162261361.py - Creating sub-video: 47/0013




[INFO] 12/08/2022 01:37:39PM: MainProcess: 162261361.py - Creating sub-video: 47/0014




[INFO] 12/08/2022 01:37:43PM: MainProcess: 162261361.py - Creating sub-video: 47/0015




[INFO] 12/08/2022 01:37:46PM: MainProcess: 162261361.py - Creating sub-video: 47/0016




[INFO] 12/08/2022 01:37:49PM: MainProcess: 162261361.py - Creating sub-video: 47/0017




[INFO] 12/08/2022 01:37:53PM: MainProcess: 162261361.py - Creating sub-video: 47/0018




[INFO] 12/08/2022 01:37:56PM: MainProcess: 162261361.py - Creating sub-video: 47/0019




[INFO] 12/08/2022 01:38:00PM: MainProcess: 162261361.py - Creating sub-video: 47/0020




[INFO] 12/08/2022 01:38:04PM: MainProcess: 162261361.py - Creating sub-video: 47/0021




[INFO] 12/08/2022 01:38:07PM: MainProcess: 162261361.py - Creating sub-video: 47/0022




[INFO] 12/08/2022 01:38:11PM: MainProcess: 162261361.py - Creating sub-video: 47/0023




[INFO] 12/08/2022 01:38:14PM: MainProcess: 162261361.py - Creating sub-video: 47/0024




[INFO] 12/08/2022 01:38:15PM: MainProcess: 162261361.py - Splitting video: 50
[INFO] 12/08/2022 01:38:51PM: MainProcess: 162261361.py - Creating sub-video: 50/0000
[INFO] 12/08/2022 01:38:56PM: MainProcess: 162261361.py - Creating sub-video: 50/0001
[INFO] 12/08/2022 01:38:59PM: MainProcess: 162261361.py - Creating sub-video: 50/0002
[INFO] 12/08/2022 01:39:03PM: MainProcess: 162261361.py - Creating sub-video: 50/0003
[INFO] 12/08/2022 01:39:06PM: MainProcess: 162261361.py - Creating sub-video: 50/0004
[INFO] 12/08/2022 01:39:10PM: MainProcess: 162261361.py - Creating sub-video: 50/0005
[INFO] 12/08/2022 01:39:14PM: MainProcess: 162261361.py - Creating sub-video: 50/0006
[INFO] 12/08/2022 01:39:17PM: MainProcess: 162261361.py - Creating sub-video: 50/0007
[INFO] 12/08/2022 01:39:21PM: MainProcess: 162261361.py - Creating sub-video: 50/0008
[INFO] 12/08/2022 01:39:24PM: MainProcess: 162261361.py - Creating sub-video: 50/0009
[INFO] 12/08/2022 01:39:28PM: MainProcess: 162261361.py - Crea