In [1]:
import fiftyone as fo
import fiftyone.brain as fob
import numpy as np
from glob import glob
import json
import os

In [2]:
# Tên dataset cần xóa
dataset_name = "AICBaseline"

# Xóa dataset
if fo.dataset_exists(dataset_name):
    fo.delete_dataset(dataset_name)

In [3]:
dataset = fo.Dataset.from_images_dir(
    images_dir="../data/batch1/keyframes", 
    name=dataset_name,
    tags=None,
    recursive=True
)

 100% |███████████████| 8115/8115 [908.2ms elapsed, 0s remaining, 9.0K samples/s]       


In [4]:
session = fo.launch_app(dataset, auto=False)

Session launched. Run `session.show()` to open the App in a cell output.


In [5]:
for sample in dataset:
    _, sample['video'], sample['frameid'] = sample['filepath'][:-4].rsplit('\\', 2)
    sample.save()

In [6]:
# object detection
for sample in dataset:
    frame_id = sample['frameid']

    if frame_id == "0236":
        break

    object_path = f"../data/batch1/objects/0{frame_id}.json"
    try:
        with open(object_path) as jsonfile:
            det_data = json.load(jsonfile)
    except:
        break
    
    detections = []
    for cls, box, score in zip(det_data['detection_class_entities'], det_data['detection_boxes'], det_data['detection_scores']):
        # Convert to [top-left-x, top-left-y, width, height]
        boxf = [float(box[1]), float(box[0]), float(box[3]) - float(box[1]), float(box[2]) - float(box[0])]
        scoref = float(score)

        # Only add objects with confidence > 0.4
        if scoref > 0.4:
            detections.append(
                fo.Detection(
                    label=cls,
                    bounding_box=boxf,
                    confidence=scoref
                )
            )
    
    sample["object_faster_rcnn"] = fo.Detections(detections=detections)
    sample.save()

In [7]:
# CLIP Embedding
all_keyframe = glob('../data/batch1/keyframes/*/*/*.jpg')
video_keyframe_dict = {}
all_video = glob('../data/batch1/keyframes/*/*')
all_video = [v.rsplit('\\',1)[-1] for v in all_video]

In [8]:
for kf in all_keyframe:
    _, vid, kf = kf[:-4].rsplit('\\',2)
    if vid not in video_keyframe_dict.keys():
        video_keyframe_dict[vid] = [kf]
    else:
        video_keyframe_dict[vid].append(kf)

In [9]:
for k,v in video_keyframe_dict.items():
    video_keyframe_dict[k] = sorted(v)

In [10]:
embedding_dict = {}
for v in all_video:
    clip_path = f'..\\data\\batch1\\clip-features-32\\{v}.npy'
    a = np.load(clip_path)
    embedding_dict[v] = {}
    for i,k in enumerate(video_keyframe_dict[v]):
        embedding_dict[v][k] = a[i]

In [11]:
clip_embeddings = []
for sample in dataset:
    clip_embedding = embedding_dict[sample['video']][sample['frameid']]
    clip_embeddings.append(clip_embedding)

In [12]:
fob.compute_similarity(
    dataset,
    model="clip-vit-base32-torch",      # store model's name for future use
    embeddings=clip_embeddings,          # precomputed image embeddings
    brain_key="img_sim",
)

<fiftyone.brain.internal.core.sklearn.SklearnSimilarityIndex at 0x2039eda7f80>

In [13]:
# Bạn cần phải cài version umap-learn hỗ trợ.
fob.compute_visualization(
    dataset,
    embeddings=clip_embeddings,
    brain_key="img_viz"
)

Generating visualization...


  from .autonotebook import tqdm as notebook_tqdm


UMAP( verbose=True)
Mon Sep  2 09:05:55 2024 Construct fuzzy simplicial set
Mon Sep  2 09:05:55 2024 Finding Nearest Neighbors
Mon Sep  2 09:05:55 2024 Building RP forest with 10 trees
Mon Sep  2 09:05:59 2024 NN descent for 13 iterations
	 1  /  13
	 2  /  13
	 3  /  13
	 4  /  13
	 5  /  13
	Stopping threshold met -- exiting after 5 iterations
Mon Sep  2 09:06:08 2024 Finished Nearest Neighbor Search
Mon Sep  2 09:06:10 2024 Construct embedding


Epochs completed:   3%| ▎          13/500 [00:02]

	completed  0  /  500 epochs


Epochs completed:  13%| █▎         64/500 [00:02]

	completed  50  /  500 epochs


Epochs completed:  21%| ██▏        107/500 [00:03]

	completed  100  /  500 epochs


Epochs completed:  31%| ███        154/500 [00:04]

	completed  150  /  500 epochs


Epochs completed:  41%| ████       204/500 [00:05]

	completed  200  /  500 epochs


Epochs completed:  52%| █████▏     258/500 [00:06]

	completed  250  /  500 epochs


Epochs completed:  62%| ██████▏    308/500 [00:07]

	completed  300  /  500 epochs


Epochs completed:  71%| ███████    353/500 [00:08]

	completed  350  /  500 epochs


Epochs completed:  81%| ████████▏  407/500 [00:10]

	completed  400  /  500 epochs


Epochs completed:  94%| █████████▎ 468/500 [00:11]

	completed  450  /  500 epochs


Epochs completed: 100%| ██████████ 500/500 [00:11]

Mon Sep  2 09:06:22 2024 Finished embedding





<fiftyone.brain.visualization.VisualizationResults at 0x203a6ac3e60>

In [14]:
session.open_tab()

<IPython.core.display.Javascript object>