In [1]:
import numpy as np
import os, sys
import importlib
from tqdm.notebook import tqdm, trange
import pandas as pd
import json
import itertools
from IPython.display import display

# Import test handler
from FewShotTestHandler import FewShotTestHandler

# Most common classifier
from classifier.WeightedTextFewShotClassifier import WeightedTextFewShotClassifier

### Load VLM

Note: This notebook must be run using the corresponding conda environment

In [2]:
if False:
    from VTTWINS.wrapper import VTTWINS_SimilarityVLM
    vlm = VTTWINS_SimilarityVLM(reset_cache=False)
    
if False:
    from CLIP.CLIPVLM import ClipVLM
    vlm = ClipVLM(reset_cache=False)
    
if True:
    from UNIVL.wrapper import UniVL_SimilarityVLM
    vlm = UniVL_SimilarityVLM(reset_cache=False)

/home/rob/vlm_benchmark/UNIVL/VideoFeatureExtractor
Weights from pretrained model not used in S3D: 
   text_module.word_embd.weight
   text_module.fc1.weight
   text_module.fc1.bias
   text_module.fc2.weight
   text_module.fc2.bias
/home/rob/vlm_benchmark/UNIVL/UniVL/modules/bert-base-uncased/vocab.txt


Stage-One:True, Stage-Two:False


### Test Handler

Runs few-shot testing and permanently saves results

In [3]:
test_handler = FewShotTestHandler()

### Run Repeated Tests

In [4]:
N_WAY_LIST = [5, 10]
N_SUPPORT_LIST = [0, 1, 2, 5, 10]
TEXT_WEIGHT_LIST = [0.1, 0.5, 1, 2, 5, 10, 20]
N_QUERY = 1
N_EPISODES = 1000
DATASET_SPLIT_PATH = "/home/datasets/kinetics_100_split/val.txt"

In [5]:
# Fill VLM Cache for chosen dataset
test_handler.fill_cache(vlm, DATASET_SPLIT_PATH)

  0%|          | 0/1200 [00:00<?, ?it/s]

Decoding video: /home/datasets/kinetics_100/065.baking_cookies/ZK2H5KlcHVM.mp4
Decoding video: /home/datasets/kinetics_100/065.baking_cookies/ZK2H5KlcHVM.mp4
Decoding video: /home/datasets/kinetics_100/065.baking_cookies/r9YmcA13hFE.mp4
Decoding video: /home/datasets/kinetics_100/065.baking_cookies/r9YmcA13hFE.mp4
Decoding video: /home/datasets/kinetics_100/065.baking_cookies/FreqqAT1YmU.mp4
Decoding video: /home/datasets/kinetics_100/065.baking_cookies/FreqqAT1YmU.mp4
Decoding video: /home/datasets/kinetics_100/065.baking_cookies/FWx9d8nPnJY.mp4
Decoding video: /home/datasets/kinetics_100/065.baking_cookies/FWx9d8nPnJY.mp4


In [None]:
# Dynamically display most recent test results
disp = display(display_id=True)
disp.update(test_handler.results.tail(5))

param_list = list(itertools.product(N_WAY_LIST, N_SUPPORT_LIST, TEXT_WEIGHT_LIST))
param_list = list(filter(lambda x: not (x[1] == 0 and x[2] != 1), param_list)) # Remove repeated zero-shot tests with different text_weights

for n_way, n_support, text_weight in tqdm(param_list):
    classifier = WeightedTextFewShotClassifier(vlm, metric=None, text_weight=text_weight)
    
    test_handler.run_few_shot_test(classifier, DATASET_SPLIT_PATH,
                                   n_way=n_way, n_support=n_support, n_query=N_QUERY, n_episodes=N_EPISODES)
    disp.update(test_handler.results.tail(5))

Unnamed: 0,vlm_class,vlm.num_frames,vlm.path,vlm.sample_strat,classifier_class,classifier.metric,classifier.text_weight,dataset_split,n_way,n_support,n_query,n_episodes,accuracy
328,ClipVLM,1.0,openai/clip-vit-base-patch32,uniform,WeightedTextFewShotClassifier,COSINE,1.0,/home/datasets/kinetics_100_split/val.txt,10,10,1,1000,0.8144
329,ClipVLM,1.0,openai/clip-vit-base-patch32,uniform,WeightedTextFewShotClassifier,COSINE,2.0,/home/datasets/kinetics_100_split/val.txt,10,10,1,1000,0.8209
330,ClipVLM,1.0,openai/clip-vit-base-patch32,uniform,WeightedTextFewShotClassifier,COSINE,5.0,/home/datasets/kinetics_100_split/val.txt,10,10,1,1000,0.8351
331,ClipVLM,1.0,openai/clip-vit-base-patch32,uniform,WeightedTextFewShotClassifier,COSINE,10.0,/home/datasets/kinetics_100_split/val.txt,10,10,1,1000,0.8524
332,ClipVLM,1.0,openai/clip-vit-base-patch32,uniform,WeightedTextFewShotClassifier,COSINE,20.0,/home/datasets/kinetics_100_split/val.txt,10,10,1,1000,0.8617


  0%|          | 0/58 [00:00<?, ?it/s]