In [1]:
!pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-uqb54i_d
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-uqb54i_d
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting ftfy (from clip==1.0)
  Downloading ftfy-6.2.3-py3-none-any.whl.metadata (7.8 kB)
Downloading ftfy-6.2.3-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.0/43.0 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25ldone
[?25h  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369489 sha256=1dd4490d9e29773eb77eb9cbdc39b10b908d465df1b35e3040cfafc95b73b86c
  Stored in directory: /tmp/pip-ephem-wheel-cache-el01q93d/wheels/da/2b/4c/d6691fa9597aac8bb

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import clip
from PIL import Image
import requests
import pandas as pd
import ast
from tqdm import tqdm

In [3]:
united_divided_df = pd.read_csv('/kaggle/input/united-divided-dataset/united_divided.csv')
united_divided_df['logits'] = united_divided_df['logits'].apply(ast.literal_eval)
united_divided_df.head(1)

Unnamed: 0,image_link,temperature,description,logits,hallucinations,hedges,probe_1,label_1,pred_1,context_1,...,probe_3,label_3,pred_3,context_3,probe_4,label_4,pred_4,context_4,group_num,split
0,https://cdn.pixabay.com/photo/2020/10/03/11/08...,0.8,The image features a woman holding out her ha...,"[( The, {'The': 0.6002, 'In': 0.3314, 'A': 0.0...",The image features a woman holding out her han...,The image features a woman holding out her han...,There is handbag.,False,0.527317,The image features a woman holding out her ha...,...,The dog is white.,False,0.507812,The image features a woman holding out her ha...,The dog is far from the woman.,False,0.766294,The image features a woman holding out her ha...,5,train


In [4]:
probe_cols = [f'probe_{i}' for i in range(1,5)]
label_cols = [f'label_{i}' for i in range(1,5)]
link_desc_df = united_divided_df[['image_link', 'split',*probe_cols, *label_cols]]

In [5]:
link_desc_df['split'].value_counts()

split
train    175
test     123
dev       52
Name: count, dtype: int64

In [6]:
link_desc_df.head(1)

Unnamed: 0,image_link,split,probe_1,probe_2,probe_3,probe_4,label_1,label_2,label_3,label_4
0,https://cdn.pixabay.com/photo/2020/10/03/11/08...,train,There is handbag.,There dog looks eager to jump on the woman.,The dog is white.,The dog is far from the woman.,False,False,False,False


In [7]:
train_df = link_desc_df[link_desc_df['split'] == 'train']
test_df = link_desc_df[link_desc_df['split'] == 'test']
val_df = link_desc_df[link_desc_df['split'] == 'dev']

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-L/14@336px", device=device)

100%|████████████████████████████████████████| 891M/891M [00:05<00:00, 175MiB/s]


In [15]:
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.df = dataframe
        self.cache = {}

    def __len__(self):
        return len(self.df) * 4

    def __getitem__(self, idx):
        img_url = self.df.iloc[idx//4, 0]
        probe = self.df.iloc[idx//4, 2 + idx%4]
        label = torch.tensor(self.df.iloc[idx//4, 6 + idx%4], dtype=torch.int)
        text = clip.tokenize(probe).to(device)
        with torch.no_grad():
            probe_embedding = model.encode_text(text)
        # Open image
        if self.cache.get(idx) is not None:
            image_embedding = self.cache[idx]
        else:
            image = preprocess(Image.open(requests.get(img_url, stream=True).raw)).unsqueeze(0).to(device)
            with torch.no_grad():
                image_embedding = model.encode_image(image)
            self.cache[idx] = image_embedding
        return image_embedding.squeeze().to(torch.float), probe_embedding.squeeze().to(torch.float), label.to(torch.float).to(device)

In [16]:
BATCH_SIZE = 4
EMBED_DIM = 768

train_dataset = CustomDataset(train_df)
test_dataset = CustomDataset(test_df)
val_dataset = CustomDataset(val_df)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [22]:
class EmbeddingClassifier(nn.Module):
    def __init__(self):
        super(EmbeddingClassifier, self).__init__()
        # Define a fully connected layer to combine the two embeddings
        self.fc1 = nn.Linear(EMBED_DIM * 2, 512)  # 768 * 2 because we are concatenating two embeddings
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 1)  # Output layer
        
        # Activation functions
        self.relu = nn.ReLU()
        self.dropout1 = nn.Dropout(p=0.3)
        self.dropout2 = nn.Dropout(p=0.5)
        self.sigmoid = nn.Sigmoid()

    def forward(self, embed1, embed2):
        # Concatenate the two embeddings
        x = self.dropout1(torch.cat((embed1, embed2), dim=1))
        # Forward pass through the network
        x = self.dropout2(self.relu(self.fc1(x)))
        x = self.dropout2(self.relu(self.fc2(x)))
        x = self.fc3(x)
        
        # Apply sigmoid to output a probability
        x = self.sigmoid(x)
        
        return x
    
    def save_model(self, path):
        """Save the model state dictionary to the specified path."""
        torch.save(self.state_dict(), path)
        print(f"Model saved to {path}")

    def load_model(self, path):
        """Load the model state dictionary from the specified path."""
        self.load_state_dict(torch.load(path))
        self.eval()  # Set the model to evaluation mode
        print(f"Model loaded from {path}")

In [23]:
num_epochs = 50
LR = 1e-4

my_model = EmbeddingClassifier().to(device)
print(sum(p.numel() for p in my_model.parameters() if p.requires_grad))
optimizer = torch.optim.Adam(my_model.parameters(), lr=LR)

# Early stopping parameters
patience = 5  # Number of epochs to wait before stopping if no improvement
best_val_loss = float('inf')  # Initialize to infinity
counter = 0  # Counter for early stopping

852737


In [24]:
# def f1_loss(y_pred, y_true):
#     tp = torch.sum((y_true * y_pred).float(), dim=0)
#     tn = torch.sum(((1 - y_true) * (1 - y_pred)).float(), dim=0)
#     fp = torch.sum(((1 - y_true) * y_pred).float(), dim=0)
#     fn = torch.sum((y_true * (1 - y_pred)).float(), dim=0)

#     p = tp / (tp + fp + 1e-7)
#     r = tp / (tp + fn + 1e-7)

#     f1 = 2 * p * r / (p + r + 1e-7)
#     f1 = torch.where(torch.isnan(f1), torch.zeros_like(f1), f1)
#     return 1 - torch.mean(f1)

criterion = nn.BCELoss()

In [25]:
# Training loop
for epoch in range(num_epochs):
    my_model.train()  # Set model to training mode
    train_loss = 0.0

    for img_embed, desc_embed, label in tqdm(train_loader):
        optimizer.zero_grad()  # Zero the parameter gradients
        # Forward pass
        output = my_model(img_embed, desc_embed)
        loss = criterion(output, label.unsqueeze(1))
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * img_embed.size(0)  # Accumulate train loss

    # Calculate average train loss
    avg_train_loss = train_loss / len(train_loader.dataset)
    
    # Validation step
    my_model.eval()  # Set model to evaluation mode
    val_loss = 0.0

    with torch.no_grad():
        for img_embed, desc_embed, label in tqdm(val_loader):
            output = my_model(img_embed, desc_embed)
            loss = criterion(output, label.unsqueeze(1))
            val_loss += loss.item() * img_embed.size(0)  # Accumulate validation loss

    # Calculate average validation loss
    avg_val_loss = val_loss / len(val_loader.dataset)
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

    # Check if the validation loss improved
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        counter = 0
        my_model.save_model('best_model.pth')
        print(f"Validation loss improved, saving model to best_model.pth")
    else:
        counter += 1
        print(f"Validation loss did not improve for {counter} epochs")

    # Early stopping
    if counter >= patience:
        print("Early stopping triggered")
        break

  label = torch.tensor(self.df.iloc[idx//4, 6 + idx%4], dtype=torch.int)
100%|██████████| 175/175 [00:06<00:00, 27.95it/s]
100%|██████████| 52/52 [00:01<00:00, 27.95it/s]


Epoch [1/50], Train Loss: 0.6935, Val Loss: 0.6877
Model saved to best_model.pth
Validation loss improved, saving model to best_model.pth


100%|██████████| 175/175 [00:06<00:00, 27.81it/s]
100%|██████████| 52/52 [00:01<00:00, 28.87it/s]


Epoch [2/50], Train Loss: 0.6887, Val Loss: 0.6864
Model saved to best_model.pth
Validation loss improved, saving model to best_model.pth


100%|██████████| 175/175 [00:06<00:00, 27.92it/s]
100%|██████████| 52/52 [00:01<00:00, 29.00it/s]


Epoch [3/50], Train Loss: 0.6787, Val Loss: 0.6839
Model saved to best_model.pth
Validation loss improved, saving model to best_model.pth


100%|██████████| 175/175 [00:06<00:00, 27.14it/s]
100%|██████████| 52/52 [00:01<00:00, 28.84it/s]


Epoch [4/50], Train Loss: 0.6577, Val Loss: 0.6845
Validation loss did not improve for 1 epochs


100%|██████████| 175/175 [00:06<00:00, 27.88it/s]
100%|██████████| 52/52 [00:01<00:00, 28.82it/s]


Epoch [5/50], Train Loss: 0.6362, Val Loss: 0.6799
Model saved to best_model.pth
Validation loss improved, saving model to best_model.pth


100%|██████████| 175/175 [00:06<00:00, 27.84it/s]
100%|██████████| 52/52 [00:01<00:00, 28.93it/s]


Epoch [6/50], Train Loss: 0.6236, Val Loss: 0.6953
Validation loss did not improve for 1 epochs


100%|██████████| 175/175 [00:06<00:00, 28.07it/s]
100%|██████████| 52/52 [00:01<00:00, 28.23it/s]


Epoch [7/50], Train Loss: 0.5954, Val Loss: 0.6900
Validation loss did not improve for 2 epochs


100%|██████████| 175/175 [00:06<00:00, 27.61it/s]
100%|██████████| 52/52 [00:01<00:00, 29.21it/s]


Epoch [8/50], Train Loss: 0.5692, Val Loss: 0.7010
Validation loss did not improve for 3 epochs


100%|██████████| 175/175 [00:06<00:00, 27.59it/s]
100%|██████████| 52/52 [00:01<00:00, 28.94it/s]


Epoch [9/50], Train Loss: 0.5632, Val Loss: 0.7087
Validation loss did not improve for 4 epochs


100%|██████████| 175/175 [00:06<00:00, 27.83it/s]
100%|██████████| 52/52 [00:01<00:00, 29.09it/s]

Epoch [10/50], Train Loss: 0.5316, Val Loss: 0.7222
Validation loss did not improve for 5 epochs
Early stopping triggered



