In [1]:
import numpy as np
import os, sys
import importlib
from tqdm.notebook import tqdm, trange
import pandas as pd

import torch
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

from SimilarityVLM import SimilarityVLM
from dataset.dataset import FewShotTaskDataset, SequentialVideoDataset, SequentialCategoryNameDataset
from FewShotClassifier import FewShotClassifier

## Test Parameters

### Choose VLM to Test

Note, this notebook must be run using the corresponding conda environment

In [2]:
VLM = importlib.import_module("VT-TWINS.wrapper").VTTWINS_SimilarityVLM

### Choose Dataset to Test

In [3]:
DATASET_SPLIT_PATH = "/home/datasets/kinetics_100_split/test.txt"

### Choose Few-Shot Task Parameters

In [4]:
N_WAY = 5                       # Number of categories to choose between in each task
N_SUPPORT = 10                  # Number of example videos per category per task
N_QUERY = 1                     # Number of test videos per category per task
N_EPISODES = 1000               # Number of few-shot tasks sampled in one iteration of the dataset

## VLM Setup

### Load VLM and Few-Shot Classifier

In [5]:
vlm = VLM(reset_cache=False)
classifier = FewShotClassifier(vlm, metric=None)

### Fill the Cache

In [6]:
video_dataset = SequentialVideoDataset(DATASET_SPLIT_PATH)

try:
    for vid_path in tqdm(video_dataset):
        if vid_path not in vlm.embed_cache:
            vlm.get_video_embeds(vid_path)
except KeyboardInterrupt:
    pass
finally:
    vlm.save_cache()

  0%|          | 0/2400 [00:00<?, ?it/s]

  video = th.from_numpy(video)


## Test Setup

### Setup DataFrame for Saving Test Results

In [None]:
TEST_RESULTS_PATH = "test_results.csv"
TEST_RESULTS_COLUMNS = ["vlm_class", "classifier_class", "dataset_split", "n_way", "n_support", "n_query", "n_episodes", "accuracy"]

if os.path.exists(TEST_RESULTS_PATH):
    test_results = pd.read_csv(TEST_RESULTS_PATH)
else:
    test_results = pd.DataFrame(columns=TEST_RESULTS_COLUMNS)

### Testing Function

In [None]:
def few_shot_accuracy(classifier: FewShotClassifier, dataset_split_path: str,
                      n_way: int, n_support: int, n_query: int = 1, n_episodes: int = 1000,
                      test_results_df: pd.DataFrame = None) -> float:
    
    # Load dataset to generate tasks with the desired params
    dataset = FewShotTaskDataset(dataset_split_path, n_episodes, n_way, n_support, n_query)
    
    correct_predictions = 0
    total_queries = 0
    for vid_paths, category_names in tqdm(dataset):
        
        query_vid_paths = vid_paths[:, n_support:]
        if n_support > 0:
            support_vid_paths = vid_paths[:, :n_support]
        else:
            support_vid_paths = None
            
        query_predictions = classifier.predict(category_names, support_vid_paths, query_vid_paths)
        
        correct_predictions += np.sum(query_predictions == np.arange(n_way)[:, None])
        total_queries += n_way * n_query
        
    accuracy = correct_predictions / total_queries
    
    # Save into test_results df if existing
    if test_results_df is not None:
        df_row = {
            "vlm_class": [classifier.vlm.__class__.__name__],
            "classifier_class": [classifier.__class__.__name__],
            "dataset_split": [dataset_split_path],
            "n_way": [n_way],
            "n_support": [n_support],
            "n_query": [n_query],
            "n_episodes": [n_episodes],
            "accuracy": [accuracy]
        }
        
    
    return correct_predictions / total_queries

## Run Test

In [None]:
few_shot_accuracy(classifier, DATASET_SPLIT_PATH, n_way=N_WAY, n_support=N_SUPPORT, n_query=N_QUERY, n_episodes=N_EPISODES, test_results_df=test_results)

### Save Updated Test Results

In [None]:
test_results.to_csv(TEST_RESULTS_PATH, index=False)