In [1]:
import os
import json

folder_path = "../tmp/evaluation" 

# Zeroshot Retrieval

## Crossmodal3600

In [2]:
data_name = "crossmodal3600" 
model_name = "google_siglip2-base-patch16-256_no_dual_projection"
stage_name = "stage_1"

files = sorted([file for file in os.listdir(folder_path) if data_name in file and model_name in file and stage_name in file and "checkpoint" in file], key=lambda file: int(file.split("checkpoint-")[1].split("_hf")[0]))
files.extend([file for file in os.listdir(folder_path) if data_name in file and model_name in file and stage_name in file and "checkpoint" not in file])
print(len(files))

all_results = {}
for file in files:
    if "checkpoint" in file:
        checkpoint = int(file.split("checkpoint-")[1].split("_hf")[0])
    else:
        checkpoint = "final"

    with open(f"{folder_path}/{file}") as f:
        result = json.load(f)

    all_results[checkpoint] = result

all_top_k = [1, 5, 10]
best_results = {}

for k in all_top_k:
    text2image_recall = f"text2image_recall@{k}"
    image2text_recall = f"image2text_recall@{k}"

    best_results[text2image_recall] = []
    best_results[image2text_recall] = []

    for checkpoint, result in all_results.items():
        best_results[text2image_recall].append({"checkpoint": checkpoint, "result": result[text2image_recall]})
        best_results[image2text_recall].append({"checkpoint": checkpoint, "result": result[image2text_recall]})

    best_results[text2image_recall] = sorted(best_results[text2image_recall], key=lambda x: x["result"], reverse=True)[:5]
    best_results[image2text_recall] = sorted(best_results[image2text_recall], key=lambda x: x["result"], reverse=True)[:5]

for metric, top_results in best_results.items():
    print("##########"*3)
    print(f"Metric: {metric}")
    for i, best_result in enumerate(top_results):
        print(f"Best checkpoint {i+1}: {best_result['checkpoint']}")
        print(f"Best result {i+1}: {best_result['result']}")
        print(f"Other results:")
        print(all_results[best_result['checkpoint']]) 
        print()

164
##############################
Metric: text2image_recall@1
Best checkpoint 1: 356
Best result 1: 0.6725000143051147
Other results:
{'text2image_recall@1': 0.6725000143051147, 'image2text_recall@1': 0.659166693687439, 'text2image_recall@5': 0.8936111330986023, 'image2text_recall@5': 0.8974999785423279, 'text2image_recall@10': 0.9416666626930237, 'image2text_recall@10': 0.9397222399711609}

Best checkpoint 2: 534
Best result 2: 0.6708333492279053
Other results:
{'text2image_recall@1': 0.6708333492279053, 'image2text_recall@1': 0.6555555462837219, 'text2image_recall@5': 0.8911111354827881, 'image2text_recall@5': 0.8966666460037231, 'text2image_recall@10': 0.941944420337677, 'image2text_recall@10': 0.9427777528762817}

Best checkpoint 3: 267
Best result 3: 0.6688888669013977
Other results:
{'text2image_recall@1': 0.6688888669013977, 'image2text_recall@1': 0.659166693687439, 'text2image_recall@5': 0.8869444727897644, 'image2text_recall@5': 0.8927778005599976, 'text2image_recall@10': 0.9

In [3]:
all_results[8900]

{'text2image_recall@1': 0.6461111307144165,
 'image2text_recall@1': 0.6252777576446533,
 'text2image_recall@5': 0.8838889002799988,
 'image2text_recall@5': 0.8772222399711609,
 'text2image_recall@10': 0.9350000023841858,
 'image2text_recall@10': 0.9344444274902344}

## KTVIC

In [4]:
data_name = "KTVIC" 
model_name = "google_siglip2-base-patch16-256_no_dual_projection"
stage_name = "stage_1"

files = sorted([file for file in os.listdir(folder_path) if data_name in file and model_name in file and stage_name in file and "checkpoint" in file], key=lambda file: int(file.split("checkpoint-")[1].split("_hf")[0]))
files.extend([file for file in os.listdir(folder_path) if data_name in file and model_name in file and stage_name in file and "checkpoint" not in file])
print(len(files))

all_results = {}
for file in files:
    if "checkpoint" in file:
        checkpoint = int(file.split("checkpoint-")[1].split("_hf")[0])
    else:
        checkpoint = "final"

    with open(f"{folder_path}/{file}") as f:
        result = json.load(f)

    all_results[checkpoint] = result

all_top_k = [1, 5, 10]
best_results = {}

for k in all_top_k:
    text2image_recall = f"text2image_recall@{k}"
    image2text_recall = f"image2text_recall@{k}"

    best_results[text2image_recall] = []
    best_results[image2text_recall] = []

    for checkpoint, result in all_results.items():
        best_results[text2image_recall].append({"checkpoint": checkpoint, "result": result[text2image_recall]})
        best_results[image2text_recall].append({"checkpoint": checkpoint, "result": result[image2text_recall]})

    best_results[text2image_recall] = sorted(best_results[text2image_recall], key=lambda x: x["result"], reverse=True)[:5]
    best_results[image2text_recall] = sorted(best_results[image2text_recall], key=lambda x: x["result"], reverse=True)[:5]

for metric, top_results in best_results.items():
    print("##########"*3)
    print(f"Metric: {metric}")
    for i, best_result in enumerate(top_results):
        print(f"Best checkpoint {i+1}: {best_result['checkpoint']}")
        print(f"Best result {i+1}: {best_result['result']}")
        print(f"Other results:")
        print(all_results[best_result['checkpoint']]) 
        print()

164
##############################
Metric: text2image_recall@1
Best checkpoint 1: 890
Best result 1: 0.35985663533210754
Other results:
{'text2image_recall@1': 0.35985663533210754, 'image2text_recall@1': 0.4695340394973755, 'text2image_recall@5': 0.65663081407547, 'image2text_recall@5': 0.7150537371635437, 'text2image_recall@10': 0.7666666507720947, 'image2text_recall@10': 0.8046594858169556}

Best checkpoint 2: 445
Best result 2: 0.35949820280075073
Other results:
{'text2image_recall@1': 0.35949820280075073, 'image2text_recall@1': 0.4677419364452362, 'text2image_recall@5': 0.6519713401794434, 'image2text_recall@5': 0.698924720287323, 'text2image_recall@10': 0.7627240419387817, 'image2text_recall@10': 0.815412163734436}

Best checkpoint 3: 1068
Best result 3: 0.3584229350090027
Other results:
{'text2image_recall@1': 0.3584229350090027, 'image2text_recall@1': 0.46415770053863525, 'text2image_recall@5': 0.6544802784919739, 'image2text_recall@5': 0.7114695310592651, 'text2image_recall@10'

# Zeroshot Classification

## 30VNFoods_test

In [6]:
data_name = "30VNFoods_test" 
model_name = "google_siglip2-base-patch16-256_no_dual_projection"
stage_name = "stage_1"

files = sorted([file for file in os.listdir(folder_path) if data_name in file and model_name in file and stage_name in file and "checkpoint" in file], key=lambda file: int(file.split("checkpoint-")[1].split("_hf")[0]))
files.extend([file for file in os.listdir(folder_path) if data_name in file and model_name in file and stage_name in file and "checkpoint" not in file])
print(len(files))

all_results = {}
for file in files:
    if "checkpoint" in file:
        checkpoint = int(file.split("checkpoint-")[1].split("_hf")[0])
    else:
        checkpoint = "final"

    with open(f"{folder_path}/{file}") as f:
        result = json.load(f)

    all_results[checkpoint] = result

metrics = ["acc1", "acc5", "mean_per_class_recall"]
best_results = {}

for metric in metrics:
    best_results[metric] = []

    for checkpoint, result in all_results.items():
        best_results[metric].append({"checkpoint": checkpoint, "result": result[metric]})

    best_results[metric] = sorted(best_results[metric], key=lambda x: x["result"], reverse=True)[0: 10]

for metric, top_results in best_results.items():
    print("##########"*3)
    print(f"Metric: {metric}")
    for i, best_result in enumerate(top_results):
        print(f"Best checkpoint {i+1}: {best_result['checkpoint']}")
        print(f"Best result {i+1}: {best_result['result']}")
        print(f"Other results:")
        print(all_results[best_result['checkpoint']]) 
        print()

164
##############################
Metric: acc1
Best checkpoint 1: 267
Best result 1: 0.7365079365079366
Other results:
{'acc1': 0.7365079365079366, 'acc5': 0.9674603174603175, 'mean_per_class_recall': 0.7254963482747921}

Best checkpoint 2: 178
Best result 2: 0.7357142857142858
Other results:
{'acc1': 0.7357142857142858, 'acc5': 0.9698412698412698, 'mean_per_class_recall': 0.7228467367116095}

Best checkpoint 3: 445
Best result 3: 0.7353174603174604
Other results:
{'acc1': 0.7353174603174604, 'acc5': 0.9678571428571429, 'mean_per_class_recall': 0.7258659825549283}

Best checkpoint 4: 356
Best result 4: 0.7351190476190477
Other results:
{'acc1': 0.7351190476190477, 'acc5': 0.9678571428571429, 'mean_per_class_recall': 0.724580270596356}

Best checkpoint 5: 89
Best result 5: 0.7339285714285714
Other results:
{'acc1': 0.7339285714285714, 'acc5': 0.9670634920634921, 'mean_per_class_recall': 0.7197667001838898}

Best checkpoint 6: 534
Best result 6: 0.7325396825396825
Other results:
{'acc1'