# AIC23

Load dataset and launch app

In [None]:
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.brain as fob
import numpy as np

dataset = fo.Dataset.from_images_patt("./data/keyframes/*/*.jpg", name=None, tags=None)
session = fo.launch_app(dataset, desktop=False)
session.wait()

In [None]:
import os
import csv
import pandas as pd

info = dict()
for file in os.scandir("map-keyframes"):
    if file.name[0] == '.':
        continue
    videoId = file.name.split(".")[0]
    data = pd.read_csv(file.path)
    info[videoId] = data.to_dict()

In [None]:
import json

for sample in dataset:
    if "checkpoint" in sample["filepath"]:
        continue;
    print(f"\r{sample['filepath']} is being processed", end="", flush=True)
    videoId, frameId = sample["filepath"].split("/")[-2:]
    videoId = videoId.split(".")[0]
    frameId = frameId.split(".")[0]
    object_file = f"./objects/{videoId}/{frameId}.json"
    frameId = int(frameId)
    sample["videoId"] = videoId
    sample["n"] = str(info[videoId]['n'][frameId-1])
    sample["frameId"] = str(info[videoId]['frame_idx'][frameId-1])
    sample["pts_time"] = str(info[videoId]['pts_time'][frameId-1])

    f = open(object_file)
    data = json.load(f)
    detections = []
    for id in range(len(data["detection_class_entities"])):
        if float(data["detection_scores"][id]) < 0.5:
            continue;
        detections.append(
            fo.Detection(
                label=data["detection_class_entities"][id],
                bounding_box=data["detection_boxes"][id],
                confidence=data["detection_scores"][id],
            )
        )
    sample["prediction"] = fo.Detections(detections=detections)
    sample.save()

### Embeddings of videos are provided 

Compute embeddings (if not provided)

In [None]:
model = foz.load_zoo_model("clip-vit-base32-torch")
embeddings = dataset.compute_embeddings(model)
with open("./embeddings/keyframes.npy", "wb") as f:
    np.save(f, embeddings);

Load embeddings

In [None]:
embeddings = np.load("./embeddings/keyframes.npy")

Compute visualization using embeddings

In [None]:
results = fob.compute_visualization(
    dataset, 
    embeddings=embeddings, 
    seed=51, 
    brain_key="img_viz"
)

Compute similarity (used to sort by similarity or query by text)

In [None]:
image_index = fob.compute_similarity(
    dataset,
    model="clip-vit-base32-torch", 
    embeddings=embeddings,       
    brain_key="img_sim",
)