### Imports

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import CLIPProcessor, CLIPModel
from peft import get_peft_model, LoraConfig, TaskType
from PIL import Image
import random
import pandas as pd
import os
import ast
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import numpy as np
print(np.__version__)

1.26.4


In [3]:
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv("OPENROUTER_API_KEY")

In [101]:
# from openai import OpenAI

# client = OpenAI(
#     base_url="https://openrouter.ai/api/v1",
#     api_key=api_key
# )

# def gloss_text(text):
#     try:
#         response = client.chat.completions.create(
#             model="deepseek/deepseek-r1-zero:free",
#             messages=[
#                 {"role": "system", "content": "You convert idiomatic or ambiguous English sentences into clear literal gloss sentences."},
#                 {"role": "user", "content": f"Gloss this sentence: {text}"}
#             ]
#         )
#         print(response)
#         return response.choices[0].message.content.strip()
#     except Exception as e:
#         print(response)
#         print(f"Glossing failed: {e}")
#         return text  # fallback to original

### API calls to DeepSeek

In [None]:
import requests
import json
import time

def gloss_text(text, max_retries=3, retry_delay=2):
    url = "https://openrouter.ai/api/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "deepseek/deepseek-r1-zero:free",
        "messages": [
            {
                "role": "system",
                "content": (
                    "You are a language expert. Your task is to convert idiomatic or figurative language into literal explanations.\n"
                    "- If the sentence contains an idiom or figurative expression, rewrite it by replacing those parts with clear, literal meanings.\n"
                    "- If it doesn't, return the sentence unchanged.\n"
                    "DO NOT explain your reasoning or provide commentary. ONLY return the final rewritten sentence."
                )
            },
            {
                "role": "user",
                "content": text
            }
        ],
        "reasoning": {
            "effort": "low",
            "exclude": True 
        }
    }

    for attempt in range(1, max_retries + 1):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload))
            result = response.json()
            content = result["choices"][0]["message"]["content"].strip()

            if content:
                return content
            else:
                print(f"Attempt {attempt}: Empty content received. Retrying...")
                time.sleep(retry_delay)

        except Exception as e:
            print(f"Attempt {attempt}: Glossing failed due to error: {e}")
            time.sleep(retry_delay)

    print("Max retries reached. Returning original text.")
    return text



### For glossing the sentences

In [52]:
import re

def preprocess_gloss(df):
    gloss_cache = {}
    for _, row in df.iterrows():
        if row["sentence"] not in gloss_cache:
            sentence = row["sentence"]
            #gloss = gloss_idiomatic_sentence(sentence)
            gloss = gloss_text(sentence)
            # Extract from \boxed{...}
            match = re.search(r"\\boxed\{(.+?)\}", gloss)
            extracted = match.group(1).strip() if match else gloss.strip()
            #print(extracted)
            gloss_cache[sentence] = extracted if extracted != "" else sentence
            print(f"\n Sentence: {sentence}\n Gloss: {gloss_cache[sentence]}\n")
    return gloss_cache

In [111]:
print(gloss_text("Its raining cats and dogs"))

new one
ChatCompletion(id='gen-1745806445-LfisyYGvUtLNSgajdaso', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content='', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning='To solve for whether the sentence contains an idiom or figurative expression, let us first identify any part of the sentence that might be an idiom or a figurative expression. \n\nThe sentence given is:\n"He moved from the 10 pm slot to his lower profile life as a DJ on Radio Norwich\'s graveyard shift."\n\nThe part of the sentence that seems like a potential idiom or figurative expression is "graveyard shift." \n\nThe term "graveyard shift" typically refers to a work shift that takes place during the late night to early morning hours, usually from midnight until 8 a.m. It is called a "graveyard shift" because it is a time when most people are asleep, which makes it a quiet and often less desirable shift to wo

### Load datasets

In [14]:
train_df = pd.read_csv("train/subtask_a_train.tsv", sep='\t')
dev_df = pd.read_csv("dev/subtask_a_dev.tsv", sep='\t')
test_df = pd.read_csv("test/subtask_a_test.tsv", sep='\t')

### The following code creates gloss sentences for train, dev, test using DeepSeek

In [394]:
# train_df_1 = train_df[0:30]
# train_df_2 = train_df[30:60]
# train_df_3 = train_df[60:]

# gloss_sentences_train_1 = preprocess_gloss(train_df_1)
# gloss_sentences_train_2 = preprocess_gloss(train_df_2)
# gloss_sentences_train_3 = preprocess_gloss(train_df_3)

# with open("gloss_sentences_train1.json", "w") as file:
#     json.dump(gloss_sentences_train_1, file, indent=4)

# with open("gloss_sentences_train2.json", "w") as file:
#     json.dump(gloss_sentences_train_2, file, indent=4)

# with open("gloss_sentences_train3.json", "w") as file:
#     json.dump(gloss_sentences_train_3, file, indent=4)

# with open('gloss_sentences_train1.json', 'r') as f:
#     data1 = json.load(f)

# with open('gloss_sentences_train2.json', 'r') as f:
#     data2 = json.load(f)

# with open('gloss_sentences_train3.json', 'r') as f:
#     data3 = json.load(f)

# train_data = {**data1, **data2, **data3}

# with open('gloss_sentences_train.json', 'w') as file:
#     json.dump(train_data, file, indent=4)

# gloss_sentences_dev = preprocess_gloss(dev_df)

# with open("gloss_sentences_dev.json", "w") as file:
#     json.dump(gloss_sentences_dev, file, indent=4)

# gloss_sentences_test = preprocess_gloss(test_df)

# with open("gloss_sentences_test.json", "w") as file:
#     json.dump(gloss_sentences_test, file, indent=4)

### Opening the files that have stored the gloss sentences

In [4]:
import json
with open("gloss_sentences_train.json", "r") as file:
    gloss_cache_train = json.load(file)

with open("gloss_sentences_dev.json", "r") as file:
    gloss_cache_dev = json.load(file)

with open("gloss_sentences_test.json", "r") as file:
    gloss_cache_test = json.load(file)

### Create Dataset

In [5]:
class TripletDataset(Dataset):
    def __init__(self, df, gloss_cache = None):
        self.anchor_positive_negative_triplets = []
        self.gloss_cache = gloss_cache

        for index, row in df.iterrows():
            expected_order = ast.literal_eval(row["expected_order"])
            for i in range(1, 5):
                if self.gloss_cache is not None:
                    sentence = gloss_cache[row["sentence"]]
                else:
                    sentence = row["sentence"]
                self.anchor_positive_negative_triplets.append((
                    sentence,
                    os.path.join("train", row["compound"].replace("'s", "_s"), expected_order[0]),
                    os.path.join("train", row["compound"].replace("'s", "_s"), expected_order[i])
                ))

    def __len__(self):
        return len(self.anchor_positive_negative_triplets)

    def __getitem__(self, idx):
        anchor_text, pos_img_path, neg_img_path = self.anchor_positive_negative_triplets[idx]
        pos_img = Image.open(pos_img_path).convert('RGB')
        neg_img = Image.open(neg_img_path).convert('RGB')
        return (anchor_text, pos_img, neg_img)

### Triplet Loss function using cosine similarity

In [6]:
def triplet_loss_cosine_similarity(anchor_embedding, positive_embedding, negative_embedding, margin=0.3):
    pos_sim = torch.nn.functional.cosine_similarity(anchor_embedding, positive_embedding)
    neg_sim = torch.nn.functional.cosine_similarity(anchor_embedding, negative_embedding)
    loss = torch.relu(margin + neg_sim - pos_sim).mean()
    return loss

### Triplet Loss function using euclidean distance

In [7]:
def triplet_loss_euclidean_distance(anchor_embedding, positive_embedding, negative_embedding, margin=0.3):
    pos_dist = torch.nn.functional.pairwise_distance(anchor_embedding, positive_embedding, p=2)
    neg_dist = torch.nn.functional.pairwise_distance(anchor_embedding, negative_embedding, p=2)
    loss = torch.relu(pos_dist - neg_dist + margin).mean()
    return loss

### Function to use the dataloader

In [8]:
def train_collate_fn(batch):
    texts = [item[0] for item in batch]
    pos_images = [item[1] for item in batch]
    neg_images = [item[2] for item in batch]

    inputs_pos = processor(text=texts, images=pos_images, return_tensors='pt', padding=True, truncation=True)
    inputs_neg = processor(text=texts, images=neg_images, return_tensors='pt', padding=True, truncation=True)
    return inputs_pos, inputs_neg

### Function to train

In [21]:
from tqdm import tqdm


def train(model, dataset, use_cosine=True, patience=3, max_epochs=25, model_name="model"):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    loader = DataLoader(dataset, batch_size=16, shuffle=True, collate_fn=train_collate_fn)

    best_dev_ndcg = 0
    patience_counter = 0
    best_path = f"{model_name}_best.pt"

    for epoch in range(max_epochs):
        model.train()
        total_loss = 0
        pbar = tqdm(loader, desc=f"Epoch {epoch + 1}", leave=False)

        for inputs_pos, inputs_neg in pbar:
            outputs_pos = model(**inputs_pos)
            outputs_neg = model(**inputs_neg)

            anchor_emb = outputs_pos.text_embeds
            pos_emb = outputs_pos.image_embeds
            neg_emb = outputs_neg.image_embeds

            if use_cosine:
                loss = triplet_loss_cosine_similarity(anchor_emb, pos_emb, neg_emb)
            else:
                loss = triplet_loss_euclidean_distance(anchor_emb, pos_emb, neg_emb)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            pbar.set_postfix({"loss": f"{loss.item():.4f}"})

        avg_loss = total_loss / len(loader)
        train_ndcg = calculate_ndcg_score(train_df, "train", model, use_cosine, gloss_cache_train)
        dev_ndcg = calculate_ndcg_score(dev_df, "dev", model, use_cosine, gloss_cache_dev)

        print(f"Epoch {epoch + 1} - Avg Loss: {avg_loss:.4f} - Train NDCG: {train_ndcg:.4f} - Dev NDCG: {dev_ndcg:.4f}")

        # Early stopping logic
        if dev_ndcg > best_dev_ndcg:
            best_dev_ndcg = dev_ndcg
            patience_counter = 0
            torch.save(model.state_dict(), best_path)
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch + 1} (no improvement in {patience} epochs)")
                break

    return best_path

### Functions to calculate metrics

In [10]:
def dcg(relevances):
    relevances = np.asfarray(relevances)
    score = relevances[0]
    for i in range(1, len(relevances)):
        score += relevances[i] / np.log2(i + 2)
    return score

def ndcg_score(ideal_ranking, predicted_ranking):
    image_to_relevance_score = {}
    
    for i in range(0, len(ideal_ranking)):
        image_to_relevance_score[ideal_ranking[i]] = len(ideal_ranking) - i 

    predicted_relevance = []
    ideal_relevance = []
    
    for index in range(0, len(ideal_ranking)):
        ideal_relevance.append(image_to_relevance_score[ideal_ranking[index]])
        predicted_relevance.append(image_to_relevance_score[predicted_ranking[index]])

    dcg_val = dcg(predicted_relevance)
    idcg_val = dcg(ideal_relevance)

    return dcg_val / idcg_val

def get_predicted_ranking_cosine(model, image2image_paths, text):
    predicted_ranking = []

    for image_name, image_path in image2image_paths.items():
        img = Image.open(image_path).convert('RGB')
        inputs = processor(text=text, images=img, return_tensors='pt', padding=True, truncation=True)
        outputs = model(**inputs)
        predicted_ranking.append((image_name, 
                                  torch.nn.functional.cosine_similarity(outputs.text_embeds, 
                                                                        outputs.image_embeds).squeeze()))

    predicted_ranking = reversed(sorted(predicted_ranking, key=lambda x:x[1]))
    predicted_ranking = [x[0] for x in predicted_ranking]
    
    return predicted_ranking

def get_predicted_ranking_euclidean(model, image2image_paths, text):
    predicted_ranking = []

    for image_name, image_path in image2image_paths.items():
        img = Image.open(image_path).convert('RGB')
        inputs = processor(text=text, images=img, return_tensors='pt', padding=True, truncation=True)
        outputs = model(**inputs)
        predicted_ranking.append((image_name, 
                                  torch.nn.functional.pairwise_distance(outputs.text_embeds, 
                                                                                   outputs.image_embeds,
                                                                                   p=2).squeeze()))
    predicted_ranking = sorted(predicted_ranking, key=lambda x:x[1])
    predicted_ranking = [x[0] for x in predicted_ranking]
    
    return predicted_ranking

### Function to calculate NDCG score

In [11]:
def calculate_ndcg_score(df, df_type, model, use_cosine = True, gloss_cache = None):
    model.eval()
    scores = []
    for index, row in df.iterrows():
        ideal_ranking = ast.literal_eval(row["expected_order"])

        if gloss_cache is None:
            text = row["sentence"]
        else:
            text = gloss_cache[row["sentence"]]
        image_to_image_paths = {}

        for image_name in ideal_ranking:
            image_to_image_paths[image_name] = os.path.join(df_type, 
                                                            row["compound"].replace("'s", "_s"), 
                                                            image_name)

        if use_cosine:
            predicted_ranking = get_predicted_ranking_cosine(model, image_to_image_paths, text)
        else:
            predicted_ranking = get_predicted_ranking_euclidean(model, image_to_image_paths, text)
            

        score = ndcg_score(ideal_ranking, predicted_ranking)
        scores.append(score)

    return sum(scores)/len(scores)

### Function to 1pc Accuracy

In [12]:
def calculate_1pc_accuracy(df, df_type, model, use_cosine = True, gloss_cache = None):
    model.eval()
    correct = 0

    for index, row in df.iterrows():
        ideal_ranking = ast.literal_eval(row["expected_order"])

        if gloss_cache is not None:
            text = gloss_cache[row["sentence"]]
        else:
            text = row["sentence"]
        image2image_paths = {}

        for image_name in ideal_ranking:
            image2image_paths[image_name] = os.path.join(df_type, 
                                                            row["compound"].replace("'s", "_s"), 
                                                            image_name)

        if use_cosine:
            predicted_ranking = get_predicted_ranking_cosine(model, image2image_paths, text)
        else:
            predicted_ranking = get_predicted_ranking_euclidean(model, image2image_paths, text)

        if ideal_ranking[0] == predicted_ranking[0]:
            correct += 1

    return round(correct / len(df), 2)

### Train model with rank 8 and triplet loss using cosine similarity

In [22]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
rank = 8
lora_config = LoraConfig(
    r=rank,
    lora_alpha=2*rank,
    target_modules=["q_proj", "v_proj"]
)
train_dataset = TripletDataset(train_df, gloss_cache_train)
model1 = get_peft_model(model, lora_config)
train(model1, train_dataset,model_name="rank8_cosine")

                                                                                                                                                                                                                                                                                                                              

Epoch 1 - Avg Loss: 0.2628 - Train NDCG: 0.9478 - Dev NDCG: 0.9199


                                                                                                                                                                                                                                                                                                                              

Epoch 2 - Avg Loss: 0.2435 - Train NDCG: 0.9661 - Dev NDCG: 0.9220


                                                                                                                                                                                                                                                                                                                           

Epoch 3 - Avg Loss: 0.2094 - Train NDCG: 0.9733 - Dev NDCG: 0.9234


                                                                                                                                                                                                                                                                                                                           

Epoch 4 - Avg Loss: 0.1460 - Train NDCG: 0.9766 - Dev NDCG: 0.9234


                                                                                                                                                                                                                                                                                                                           

Epoch 5 - Avg Loss: 0.0773 - Train NDCG: 0.9798 - Dev NDCG: 0.9247


                                                                                                                                                                                                                                                                                                                           

Epoch 6 - Avg Loss: 0.0364 - Train NDCG: 0.9789 - Dev NDCG: 0.9329


                                                                                                                                                                                                                                                                                                                           

Epoch 7 - Avg Loss: 0.0152 - Train NDCG: 0.9780 - Dev NDCG: 0.9319


                                                                                                                                                                                                                                                                                                                           

Epoch 8 - Avg Loss: 0.0068 - Train NDCG: 0.9757 - Dev NDCG: 0.9214


                                                                                                                                                                                                                                                                                                                           

Epoch 9 - Avg Loss: 0.0030 - Train NDCG: 0.9774 - Dev NDCG: 0.9213
Early stopping at epoch 9 (no improvement in 3 epochs)


'rank8_cosine_best.pt'

In [23]:
acc = calculate_1pc_accuracy(dev_df, "dev", model1, gloss_cache = gloss_cache_dev)
print(f"Dev 1pc Accuracy: {acc*100}")

Dev 1pc Accuracy: 67.0


In [24]:
acc = calculate_1pc_accuracy(test_df, "test", model1, gloss_cache = gloss_cache_test)
print(f"Test 1pc Accuracy: {acc*100}")

Test 1pc Accuracy: 27.0


### Train model with rank 8 and triplet loss using euclidean distance

In [429]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
rank = 8
lora_config = LoraConfig(
    r=rank,
    lora_alpha=2*rank,
    target_modules=["q_proj", "v_proj"]
)
train_dataset = TripletDataset(train_df, gloss_cache_train)
model2 = get_peft_model(model, lora_config)
train(model2, train_dataset, use_cosine = False)

                                                                                

Epoch 1 - Avg Loss: 0.2720 - Train NDCG: 0.9400 - Dev NDCG: 0.9391


                                                                                

Epoch 2 - Avg Loss: 0.2546 - Train NDCG: 0.9559 - Dev NDCG: 0.9433


                                                                                

Epoch 3 - Avg Loss: 0.2279 - Train NDCG: 0.9724 - Dev NDCG: 0.9480


                                                                                

Epoch 4 - Avg Loss: 0.1804 - Train NDCG: 0.9765 - Dev NDCG: 0.9515


                                                                                

Epoch 5 - Avg Loss: 0.1159 - Train NDCG: 0.9818 - Dev NDCG: 0.9530


                                                                                

Epoch 6 - Avg Loss: 0.0578 - Train NDCG: 0.9785 - Dev NDCG: 0.9579


                                                                                

Epoch 7 - Avg Loss: 0.0250 - Train NDCG: 0.9781 - Dev NDCG: 0.9542


                                                                                

Epoch 8 - Avg Loss: 0.0091 - Train NDCG: 0.9775 - Dev NDCG: 0.9468


                                                                                

Epoch 9 - Avg Loss: 0.0043 - Train NDCG: 0.9768 - Dev NDCG: 0.9526


                                                                                

Epoch 10 - Avg Loss: 0.0024 - Train NDCG: 0.9738 - Dev NDCG: 0.9407


                                                                                

Epoch 11 - Avg Loss: 0.0017 - Train NDCG: 0.9778 - Dev NDCG: 0.9517


                                                                                

Epoch 12 - Avg Loss: 0.0012 - Train NDCG: 0.9761 - Dev NDCG: 0.9453


                                                                                

Epoch 13 - Avg Loss: 0.0010 - Train NDCG: 0.9766 - Dev NDCG: 0.9453


                                                                                

Epoch 14 - Avg Loss: 0.0010 - Train NDCG: 0.9769 - Dev NDCG: 0.9463


                                                                                

Epoch 15 - Avg Loss: 0.0010 - Train NDCG: 0.9771 - Dev NDCG: 0.9459


                                                                                

Epoch 16 - Avg Loss: 0.0010 - Train NDCG: 0.9770 - Dev NDCG: 0.9459


                                                                                

Epoch 17 - Avg Loss: 0.0010 - Train NDCG: 0.9770 - Dev NDCG: 0.9459


                                                                                

Epoch 18 - Avg Loss: 0.0021 - Train NDCG: 0.9770 - Dev NDCG: 0.9459


                                                                                

Epoch 19 - Avg Loss: 0.0010 - Train NDCG: 0.9770 - Dev NDCG: 0.9459


                                                                                

Epoch 20 - Avg Loss: 0.0010 - Train NDCG: 0.9770 - Dev NDCG: 0.9459


                                                                                

Epoch 21 - Avg Loss: 0.0010 - Train NDCG: 0.9770 - Dev NDCG: 0.9459


                                                                                

Epoch 22 - Avg Loss: 0.0010 - Train NDCG: 0.9770 - Dev NDCG: 0.9459


                                                                                

Epoch 23 - Avg Loss: 0.0010 - Train NDCG: 0.9770 - Dev NDCG: 0.9459


                                                                                

Epoch 24 - Avg Loss: 0.0010 - Train NDCG: 0.9770 - Dev NDCG: 0.9459


                                                                                

Epoch 25 - Avg Loss: 0.0010 - Train NDCG: 0.9770 - Dev NDCG: 0.9459


In [431]:
acc = calculate_1pc_accuracy(dev_df, "dev", model2, use_cosine = False, gloss_cache = gloss_cache_dev)
print(f"Dev 1pc Accuracy: {acc*100}")

Dev 1pc Accuracy: 67.0


In [None]:
acc = calculate_1pc_accuracy(test_df, "test", model2, use_cosine = False, gloss_cache = gloss_cache_test)
print(f"Test 1pc Accuracy: {acc*100}")

### Train model with rank 4 and triplet loss using cosine

In [None]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
rank = 4
lora_config = LoraConfig(
    r=rank,
    lora_alpha=2*rank,
    target_modules=["q_proj", "v_proj"]
)
train_dataset = TripletDataset(train_df, gloss_cache_train)
model3 = get_peft_model(model, lora_config)
train(model3, train_dataset, use_cosine = True)

In [None]:
acc = calculate_1pc_accuracy(dev_df, "dev", model3, use_cosine = False, gloss_cache = gloss_cache_dev)
print(f"Dev 1pc Accuracy: {acc*100}")

In [None]:
acc = calculate_1pc_accuracy(test_df, "test", model3, use_cosine = False, gloss_cache = gloss_cache_test)
print(f"Test 1pc Accuracy: {acc*100}")

### Train model with rank 4 and triplet loss using euclidean distance

In [None]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
rank = 4
lora_config = LoraConfig(
    r=rank,
    lora_alpha=2*rank,
    target_modules=["q_proj", "v_proj"]
)
train_dataset = TripletDataset(train_df, gloss_cache_train)
model4 = get_peft_model(model, lora_config)
train(model4, train_dataset, use_cosine = False)

In [None]:
acc = calculate_1pc_accuracy(dev_df, "dev", model4, use_cosine = False, gloss_cache = gloss_cache_dev)
print(f"Dev 1pc Accuracy: {acc*100}")

In [None]:
acc = calculate_1pc_accuracy(test_df, "test", model4, use_cosine = False, gloss_cache = gloss_cache_test)
print(f"Test 1pc Accuracy: {acc*100}")

### Train model with rank 2 and triplet loss using cosine

In [None]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
rank = 16
lora_config = LoraConfig(
    r=rank,
    lora_alpha=2*rank,
    target_modules=["q_proj", "v_proj"]
)
train_dataset = TripletDataset(train_df, gloss_cache_train)
model5 = get_peft_model(model, lora_config)
train(model5, train_dataset, use_cosine = False)

In [None]:
acc = calculate_1pc_accuracy(dev_df, "dev", model5, use_cosine = False, gloss_cache = gloss_cache_dev)
print(f"Dev 1pc Accuracy: {acc*100}")

### Train model with rank 2 and triplet loss using Euclidean

In [None]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
rank = 16
lora_config = LoraConfig(
    r=rank,
    lora_alpha=2*rank,
    target_modules=["q_proj", "v_proj"]
)
train_dataset = TripletDataset(train_df, gloss_cache_train)
model6 = get_peft_model(model, lora_config)
train(model6, train_dataset, use_cosine = False)

In [None]:
acc = calculate_1pc_accuracy(dev_df, "dev", model6, use_cosine = False, gloss_cache = gloss_cache_dev)
print(f"Dev 1pc Accuracy: {acc*100}")

In [None]:
acc = calculate_1pc_accuracy(test_df, "test", model6, use_cosine = False, gloss_cache = gloss_cache_test)
print(f"Test 1pc Accuracy: {acc*100}")