In [1]:
import fiftyone as fo
import pandas as pd
import os

In [2]:
# dataset name
name = "AIC_2024"

In [3]:
# run in about 15 seconds
if fo.dataset_exists(name):
    fo.delete_dataset(name)
    
dataset = fo.Dataset.from_images_dir(
    name=name, 
    images_dir="../data", 
    recursive=True
)

 100% |███████████| 106589/106589 [8.6s elapsed, 0s remaining, 12.3K samples/s]      


In [4]:
# if you want to automatically open the browser, just uncomment session.open_tab()
# or you can visit directly : http://localhost:5151/datasets/AIC_2024

session = fo.launch_app(dataset, auto=False)
# session.open_tab()

Session launched. Run `session.show()` to open the App in a cell output.


In [5]:
print(dataset.first())

<Sample: {
    'id': '66d1b1db6847aaea9ce95b22',
    'media_type': 'image',
    'filepath': '/Users/VoThinhPhat/Desktop/chatKPT-2024-AIC-HCMC/data/batch1/keyframes/keyframes_L01/L01_V001/001.jpg',
    'tags': [],
    'metadata': None,
}>


In [6]:
# run in about 36 seconds
unique_videos = set()
for sample in dataset:
    tmp, sample['video'], sample['keyframe_id'] = sample['filepath'][:-4].rsplit('/', 2)
    sample['batch'] = tmp.rsplit('/', 4)[-3]
    unique_videos.add(sample['video'])
    sample.save()

In [7]:
# map keyframes_id into corresponding frame_id
# run in nearly 40 seconds
video_frameid_dict = {}
for b in [1, 2, 3]:
    for video in unique_videos:
        filepath = f"../data/batch{b}/map-keyframes/{video}.csv"
        if os.path.exists(filepath):
            a = pd.read_csv(filepath)
            video_frameid_dict[video] = a['frame_idx']

for sample in dataset:
    sample['frame_id'] = video_frameid_dict[sample['video']].iloc[int(sample['keyframe_id']) - 1]
    sample.save()

In [8]:
print(dataset.first())

<Sample: {
    'id': '66d1b1db6847aaea9ce95b22',
    'media_type': 'image',
    'filepath': '/Users/VoThinhPhat/Desktop/chatKPT-2024-AIC-HCMC/data/batch1/keyframes/keyframes_L01/L01_V001/001.jpg',
    'tags': [],
    'metadata': None,
    'video': 'L01_V001',
    'keyframe_id': '001',
    'batch': 'batch1',
    'frame_id': 0,
}>


In [15]:
# export the entire dataset into disk, see reload.ipynb for loading
dataset.export(
    export_dir="../export",
    dataset_type=fo.types.FiftyOneDataset,
    export_media=False
)

Directory '../export' already exists; export will be merged with existing files
Exporting samples...
 100% |██████████████| 106589/106589 [1.7s elapsed, 0s remaining, 72.6K docs/s]         
