# Held-out Verb/Noun Pair Evaluation

## Semantic Similarity Metrics

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

shots = [0, 1, 2, 4, 8, 12, 16]

# EILEV-trained BLIP-2 OPT-2.7B
# 0, 1, 2, 4, 8, 12, 16 shots
eilev_blip2_opt_2_7b = {
    "STS-CE": [0.222, 0.5117, 0.51, 0.5939, 0.6602, 0.6775, 0.6814],
    "STS-BE": [0.3672, 0.5864, 0.5978, 0.6727, 0.7311, 0.7466, 0.7512],
    # 'BERTScore-F1': [0.5451, 0.6229, 0.6323, 0.639, 0.6444, 0.6449, 0.6448],
}

# EILEV Flan-T5-xl
# 0, 1, 2, 4, 8, 12, 16 shots
eilev_flan_t5_xl = {
    "STS-CE": [],
    "STS-BE": [],
    # 'BERTScore-F1': [],
}

# Plotting
plt.figure(figsize=(10, 5))

plt.title(
    "Held-out Verb/Noun Pair Eval: Semantic-similarity-based Metrics",
    y=1.25,
    fontweight="bold",
)

# EILEV models
for metric, values in eilev_blip2_opt_2_7b.items():
    plt.plot(shots, values, label=f"EILEV BLIP-2 OPT-2.7B {metric}", marker="o")

# for metric, values in eilev_flan_t5_xl.items():
#     plt.plot(shots, values, label=f"EILEV BLIP-2 Flan-T5-xl {metric}", marker="s")

plt.xlabel("Shots")
plt.ylabel("Score")
plt.xticks(shots, labels=[str(s) for s in shots])
plt.xlim(0, max(shots))
plt.legend(loc="lower center", bbox_to_anchor=(0.5, 1), ncol=3)
plt.grid(True)
plt.show()

## N-gram Metrics

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

shots = [0, 1, 2, 4, 8, 12, 16]

# EILEV-trained BLIP-2 OPT-2.7B
# 0, 1, 2, 4, 8, 12, 16 shots
eilev_blip2_opt_2_7b = {
    "RougeL": [0.1916, 0.5245, 0.5601, 0.6069, 0.6424, 0.6547, 0.6606],
    "BLEU": [0.01493, 0.1183, 0.2028, 0.2526, 0.2888, 0.3037, 0.3145],
}

# EILEV Flan-T5-xl
# 0, 1, 2, 4, 12, 16 shots
eilev_flan_t5_xl = {
    "RougeL": [],
    "BLEU": [],
}

# Plotting
plt.figure(figsize=(10, 5))

plt.title(
    "Held-out Verb/Noun Pair Eval: N-gram-based Metrics", y=1.25, fontweight="bold"
)

# EILEV models
for metric, values in eilev_blip2_opt_2_7b.items():
    plt.plot(shots, values, label=f"EILEV BLIP-2 OPT-2.7B {metric}", marker="o")

# for metric, values in eilev_flan_t5_xl.items():
#     plt.plot(shots, values, label=f"EILEV BLIP-2 Flan-T5-xl {metric}", marker="s")

plt.xlabel("Shots")
plt.ylabel("Score")
plt.xticks(shots, labels=[str(s) for s in shots])
plt.xlim(0, max(shots))
plt.legend(loc="lower center", bbox_to_anchor=(0.5, 1), ncol=3)
plt.grid(True)
plt.show()