In [5]:
import torch
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import re
import numpy as np

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
data = pd.read_csv("merged_training.csv")
label_mapping = {"joy": 0, "sadness": 1, "anger": 2, "fear": 3, "love": 4, "surprise": 5}
data["label_encoded"] = data["label"].map(label_mapping)
def tokenizer(text):
    text = re.sub(r"[^\w\s]", "", text.lower())
    return text.split()
vocab = {"<pad>": 0, "<UNK>": 1}
for sentence in data["text"].values:
    for word in tokenizer(sentence):
        if word not in vocab:
            vocab[word] = len(vocab)
def encode(sentence):
    return [vocab.get(word, vocab["<UNK>"]) for word in tokenizer(sentence)]

In [8]:
class TextDataset(Dataset):
    def __init__(self, data):
        self.texts = [torch.tensor(encode(sentence), dtype=torch.long) for sentence in data["text"]]
        self.labels = torch.tensor(data["label_encoded"].values, dtype=torch.int64)
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        return self.texts[idx], self.labels[idx]
dataset = TextDataset(data)
dataloader = DataLoader(dataset=dataset, batch_size=32, collate_fn=lambda x: tuple(zip(*x)))

In [9]:
class TextClassifier(nn.Module):
    def __init__(self, vocab_size, input_dim = 16, output_dim = 32, classes = 6):
        super(TextClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, input_dim)
        self.fc1 = nn.Linear(input_dim, output_dim)
        self.fc2 = nn.Linear(output_dim, classes)
        self.relu = nn.ReLU()
    def forward(self, x):
        x = self.embedding(x).mean(dim=1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [10]:
vocab_size = len(vocab)
model = TextClassifier(vocab_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
num_epochs = 3
for epoch in range(num_epochs):
    total_loss = 0.0
    correct = 0
    total_samples = 0
    for sentences, labels in dataloader:
        sentences = torch.nn.utils.rnn.pad_sequence(sentences, batch_first=True).to(device)
        labels = torch.tensor(labels, dtype=torch.int64).to(device)
        optimizer.zero_grad()
        outputs = model(sentences).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        predictions = torch.argmax(outputs, dim=1)
        correct += torch.eq(predictions, labels).sum().item()
        total_samples += labels.size(0)
    accuracy = (correct / total_samples) * 100
    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

Epoch 1/3, Loss: 0.3188, Accuracy: 87.02%
Epoch 2/3, Loss: 0.1809, Accuracy: 90.79%
Epoch 3/3, Loss: 0.1634, Accuracy: 91.41%


In [11]:
def predict(text, model, vocab, device):
    model.eval()
    tokens = [vocab.get(word, vocab["<UNK>"]) for word in tokenizer(text)]
    input_tensor = torch.tensor(tokens, dtype=torch.long).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(input_tensor)
    predicted_label = torch.argmax(output, dim=1).item()
    return predicted_label
label_arr = ["joy", "sadness", "anger", "fear", "love", "surprise"]
test_sentences = [
    # Joy (Happy)
    "Today is such a wonderful day!", 
    "I feel so happy to see my old friend again.", 
    "Winning this competition makes me ecstatic!", 
    "This food is so delicious, I love it!", 
    "I received a surprise gift, I'm so happy!", 

    # Sadness (Sad)
    "I feel so lonely and lost.", 
    "Today is a terrible day for me.", 
    "I miss the old days, it makes me sad.", 
    "It's heartbreaking to say goodbye to someone you love.", 
    "I failed my exam, I'm really upset.", 

    # Anger (Mad)
    "I'm so angry that I was lied to!", 
    "Stop bothering me!", 
    "I can't stand this unfairness anymore.", 
    "Why would you do that? I'm really mad!", 
    "I've warned them, but they still won't listen!", 

    # Fear (Scared)
    "I'm nervous about my exam tomorrow.", 
    "It's so dark in here, I'm scared.", 
    "I have no idea what to do in this situation.", 
    "That sound was really terrifying!", 
    "I'm afraid of losing my loved ones."
]
dem = 0
for text in test_sentences:
    dem+=1
    pred = predict(text, model, vocab, device)
    print(label_arr[pred])
    if (dem == 5):
        print("----")
        dem = 0

joy
joy
joy
joy
joy
----
sadness
sadness
sadness
fear
sadness
----
anger
joy
anger
anger
sadness
----
fear
fear
joy
fear
love
----
