<h2 align='center'>Preparing the dataset<h2>

In [43]:
import pandas as pd
import numpy as np
import torch

In [2]:
device = "cuda" if torch.cuda.is_available() else \
         ("mps" if torch.backends.mps.is_available() else "cpu")
print(device)

cuda


In [3]:
#full dataset reading
# df=pd.read_csv('text_dataset.tsv',sep='\t')
# import pandas as pd
# from sklearn.model_selection import train_test_split

# train_temp, test_temp = train_test_split(df, test_size=0.2, stratify=df['6_way_label'] ,random_state=42)

# test, val = train_test_split(test_temp, test_size=0.5,stratify=test_temp['6_way_label'] , random_state=42)

df_train = pd.read_csv('text_dataset_train.tsv', sep='\t')
df_test = pd.read_csv('text_dataset_test.tsv', sep='\t')
df_val = pd.read_csv('text_dataset_val.tsv', sep='\t')

print("Training set size:", len(df_train))
print("Test set size:", len(df_test))
print("Validation set size:", len(df_val))

Training set size: 43793
Test set size: 5475
Validation set size: 5474


In [4]:
multi_traindata = df_train
multi_validata = df_test
multi_testdata = df_val

In [5]:
train_data = multi_traindata[multi_traindata['clean_title'].notna()]

valid_data = multi_validata[multi_validata['clean_title'].notna()]

test_data = multi_testdata[multi_testdata['clean_title'].notna()]

In [6]:
train_news, train_labels, train_images, train_id = train_data ['clean_title'].tolist(), train_data['6_way_label'].tolist(), train_data['id'].tolist(), train_data['id'].tolist()
valid_news, valid_labels, valid_images, valid_id = valid_data['clean_title'].tolist(), valid_data['6_way_label'].tolist(), valid_data['id'].tolist(), valid_data['id'].tolist()
test_news, test_labels, test_images, test_id = test_data['clean_title'].tolist(), test_data['6_way_label'].tolist(), test_data['id'].tolist(), test_data['id'].tolist()

In [7]:
def add_suffix(images, suffix='.jpg'):
    return ['image_dataset/'+ image + suffix for image in images]

train_images_final = add_suffix(train_images)
valid_images_final = add_suffix(valid_images)
test_images_final = add_suffix(test_images)

In [8]:
from transformers import CLIPTokenizerFast, CLIPProcessor, CLIPModel
import torch

# if you have CUDA or MPS, set it to the active device like this

model_id = "openai/clip-vit-base-patch32"

# we initialize a tokenizer and image processor, and the model itself
tokenizer = CLIPTokenizerFast.from_pretrained(model_id)
processor = CLIPProcessor.from_pretrained(model_id)
model = CLIPModel.from_pretrained(model_id).to(device)

<h2 align='center'>Forming the text and image embeddings<h2>

In [9]:
import pandas as pd
from PIL import Image

output_file = "text_image_embeddings_train.csv"  
batch_size = 1  

embeddings_df = pd.DataFrame()

for i in range(0, len(train_news), batch_size):
    batch_ids = train_id[i:i+batch_size]
    batch_texts = train_news[i:i+batch_size]
    batch_labels = train_labels[i:i+batch_size]  
    batch_inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True)
    batch_inputs = {k: v.to(device) for k, v in batch_inputs.items()}
    batch_text_emb = model.get_text_features(**batch_inputs)
    batch_text_emb = batch_text_emb.detach().cpu().numpy()
    
    temp_df = pd.DataFrame({
        'id': batch_ids,
        'label': batch_labels,
        'text_embedding': ['\t'.join(map(str, emb)) for emb in batch_text_emb]
    })
    
    for image_path in train_images_final[i:i+batch_size]:
        image = processor(
            text=None,
            images=Image.open(image_path),
            return_tensors='pt'
        )['pixel_values'].to(device)
        img_emb = model.get_image_features(image)
        img_emb = img_emb.detach().cpu().numpy().reshape(1, -1)  
        
        temp_df['image_embedding'] = ['\t'.join(map(str, emb)) for emb in img_emb]
    
    embeddings_df = pd.concat([embeddings_df, temp_df], ignore_index=True)
    
embeddings_df.to_csv(output_file, index=False)




In [10]:
import pandas as pd
from PIL import Image

output_file = "text_image_embeddings_val.csv"  
batch_size = 1  

embeddings_df = pd.DataFrame()

for i in range(0, len(valid_news), batch_size):
    batch_ids = valid_id[i:i+batch_size]
    batch_texts = valid_news[i:i+batch_size]
    batch_labels = valid_labels[i:i+batch_size]  
    batch_inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True)
    batch_inputs = {k: v.to(device) for k, v in batch_inputs.items()}
    batch_text_emb = model.get_text_features(**batch_inputs)
    batch_text_emb = batch_text_emb.detach().cpu().numpy()
    
    temp_df = pd.DataFrame({
        'id': batch_ids,
        'label': batch_labels,
        'text_embedding': ['\t'.join(map(str, emb)) for emb in batch_text_emb]
    })
    
    for image_path in valid_images_final[i:i+batch_size]:
        image = processor(
            text=None,
            images=Image.open(image_path),
            return_tensors='pt'
        )['pixel_values'].to(device)
        img_emb = model.get_image_features(image)
        img_emb = img_emb.detach().cpu().numpy().reshape(1, -1)  
        
        temp_df['image_embedding'] = ['\t'.join(map(str, emb)) for emb in img_emb]
    
    embeddings_df = pd.concat([embeddings_df, temp_df], ignore_index=True)
    
embeddings_df.to_csv(output_file, index=False)




In [11]:
import pandas as pd
from PIL import Image

output_file = "text_image_embeddings_test.csv" 
batch_size = 1  

embeddings_df = pd.DataFrame()

for i in range(0, len(test_news), batch_size):
    batch_ids = test_id[i:i+batch_size]
    batch_texts = test_news[i:i+batch_size]
    batch_labels = test_labels[i:i+batch_size]  
    batch_inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True)
    batch_inputs = {k: v.to(device) for k, v in batch_inputs.items()}
    batch_text_emb = model.get_text_features(**batch_inputs)
    batch_text_emb = batch_text_emb.detach().cpu().numpy()
    
    temp_df = pd.DataFrame({
        'id': batch_ids,
        'label': batch_labels,
        'text_embedding': ['\t'.join(map(str, emb)) for emb in batch_text_emb]
    })
    
    for image_path in test_images_final[i:i+batch_size]:
        image = processor(
            text=None,
            images=Image.open(image_path),
            return_tensors='pt'
        )['pixel_values'].to(device)
        img_emb = model.get_image_features(image)
        img_emb = img_emb.detach().cpu().numpy().reshape(1, -1)  
        
        temp_df['image_embedding'] = ['\t'.join(map(str, emb)) for emb in img_emb]
    
    embeddings_df = pd.concat([embeddings_df, temp_df], ignore_index=True)
    
embeddings_df.to_csv(output_file, index=False)




<h2 align='center'>Defining the Model<h2>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [13]:
class MyDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)
        self.text_embeddings = self.data['text_embedding'].apply(lambda x: list(map(float, x.split('\t')))).tolist()
        self.image_embeddings = self.data['image_embedding'].apply(lambda x: list(map(float, x.split('\t')))).tolist()
        self.labels = self.data['label'].tolist()
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        text_emb = torch.tensor(self.text_embeddings[index])
        img_emb = torch.tensor(self.image_embeddings[index])
        label = torch.tensor(self.labels[index])
        return text_emb, img_emb, label

In [14]:
train_file = 'text_image_embeddings_train.csv'
val_file = 'text_image_embeddings_val.csv'
test_file = 'text_image_embeddings_test.csv'

train_data = MyDataset(train_file)
val_data = MyDataset(val_file)
test_data = MyDataset(test_file)

In [15]:
class ClassificationModel(nn.Module):
    def __init__(self, input_size):
        super(ClassificationModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 6) 
    
    def forward(self, text_emb, img_emb):
        x = torch.cat((text_emb, img_emb), dim=1)
        x = self.fc1(x)
        x = nn.ReLU()(x)
        x = self.fc2(x)
        x = nn.ReLU()(x)
        x = self.fc3(x)
        return x

model = ClassificationModel(input_size=512*2)  # fusion

<h2 align='center'>Finding the Accuracy<h2>

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32)
test_loader = DataLoader(test_data, batch_size=32)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

model.to(device)
criterion.to(device)

num_epochs = 10

for epoch in range(num_epochs):
    train_loss = 0.0
    train_correct = 0.0
    model.train()
    
    for text_emb, img_emb, labels in train_loader:
        text_emb = text_emb.to(device)
        img_emb = img_emb.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(text_emb, img_emb)
        _, predicted = torch.max(outputs.data, 1)
        
        loss = criterion(outputs, labels.long())
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        train_correct += (predicted == labels).sum().item()
    
    train_accuracy = train_correct / len(train_data)
    train_loss /= len(train_loader)
    
    model.eval()
    val_loss = 0.0
    val_correct = 0.0
    
    with torch.no_grad():
        for text_emb, img_emb, labels in val_loader:
            text_emb = text_emb.to(device)
            img_emb = img_emb.to(device)
            labels = labels.to(device)
            
            outputs = model(text_emb, img_emb)
            _, predicted = torch.max(outputs.data, 1)
            
            loss = criterion(outputs, labels.long())
            
            val_loss += loss.item()
            val_correct += (predicted == labels).sum().item()
    
    val_accuracy = val_correct / len(val_data)
    val_loss /= len(val_loader)
    
    print(f'Epoch: {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, '
          f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

Epoch: 1/10, Train Loss: 0.5518, Train Accuracy: 0.7988, Val Loss: 0.4790, Val Accuracy: 0.8232
Epoch: 2/10, Train Loss: 0.3675, Train Accuracy: 0.8660, Val Loss: 0.4566, Val Accuracy: 0.8378
Epoch: 3/10, Train Loss: 0.2588, Train Accuracy: 0.9070, Val Loss: 0.4757, Val Accuracy: 0.8340
Epoch: 4/10, Train Loss: 0.1608, Train Accuracy: 0.9430, Val Loss: 0.5982, Val Accuracy: 0.8298
Epoch: 5/10, Train Loss: 0.0921, Train Accuracy: 0.9670, Val Loss: 0.7294, Val Accuracy: 0.8283
Epoch: 6/10, Train Loss: 0.0637, Train Accuracy: 0.9774, Val Loss: 0.8395, Val Accuracy: 0.8221
Epoch: 7/10, Train Loss: 0.0498, Train Accuracy: 0.9829, Val Loss: 0.9514, Val Accuracy: 0.8216
Epoch: 8/10, Train Loss: 0.0476, Train Accuracy: 0.9830, Val Loss: 0.9711, Val Accuracy: 0.8247
Epoch: 9/10, Train Loss: 0.0375, Train Accuracy: 0.9870, Val Loss: 1.0027, Val Accuracy: 0.8296
Epoch: 10/10, Train Loss: 0.0375, Train Accuracy: 0.9873, Val Loss: 1.0839, Val Accuracy: 0.8241


In [17]:
model.eval()
test_correct = 0.0

with torch.no_grad():
    for text_emb, img_emb, labels in test_loader:
        text_emb = text_emb.to(device)
        img_emb = img_emb.to(device)
        labels = labels.to(device)
        
        outputs = model(text_emb, img_emb)
        _, predicted = torch.max(outputs.data, 1)
        
        test_correct += (predicted == labels).sum().item()

test_accuracy = test_correct / len(test_data)
print(f'Test Accuracy: {test_accuracy:.4f}')

Test Accuracy: 0.8288


<h2 align='center'>Making a Predictive System<h2>

In [27]:
import random

model.eval()

sample_indices = random.sample(range(len(test_data)), k=8)
sample_data = [test_data[i] for i in sample_indices]

for idx, (text_emb, img_emb, labels) in zip(sample_indices, sample_data):
    text_emb = text_emb.to(device)
    img_emb = img_emb.to(device)
    labels = labels.to(device)

    output = model(text_emb.unsqueeze(0), img_emb.unsqueeze(0))
    _, predicted = torch.max(output, 1)

    predicted_label = predicted.item()
    actual_label = labels.item()

    entry_id = test_data.data['id'][idx]

    print(f"id: {entry_id}\nPredicted: {predicted_label}, Actual: {actual_label}")
    print("")

id: 9ij982
Predicted: 1, Actual: 1

id: 69a8z8
Predicted: 5, Actual: 5

id: 85mi39
Predicted: 3, Actual: 3

id: 3yld8t
Predicted: 3, Actual: 3

id: 3bp10f
Predicted: 0, Actual: 3

id: 6cmjvp
Predicted: 3, Actual: 3

id: 713udl
Predicted: 1, Actual: 1

id: 7uafve
Predicted: 3, Actual: 3



<h2 align='center'>Saving the Model<h2>

In [26]:
model_path = "week5.pth"

torch.save(model.state_dict(), model_path)

print("Model saved successfully!")

Model saved successfully!
