In [None]:
!pip install transformers torch scikit-learn
!pip install tqdm


In [None]:
import pandas as pd
import numpy as np
import torch

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW


In [None]:
from google.colab import files
files.upload()
df = pd.read_csv("tweet_emotions.csv")

texts = df["content"]
labels = df["sentiment"]

print(df.head())
print(df.shape)



In [None]:
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

num_labels = len(label_encoder.classes_)
print("Number of emotions:", num_labels)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    texts,
    labels_encoded,
    test_size=0.2,
    random_state=42,
    stratify=labels_encoded
)


In [None]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

class EmotionDataset(Dataset):
    def __init__(self, texts, labels):
        self.encodings = tokenizer(
            texts.tolist(),
            truncation=True,
            padding=True,
            max_length=128
        )
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item


In [None]:
train_dataset = EmotionDataset(X_train, y_train)
test_dataset = EmotionDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=num_labels
)

model.to(device)

optimizer = AdamW(model.parameters(), lr=2e-5)


In [None]:
from tqdm import tqdm

epochs = 2

for epoch in range(epochs):
    model.train()
    total_loss = 0

    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")

    for batch in progress_bar:
        optimizer.zero_grad()

        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels_batch = batch["labels"].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels_batch
        )

        loss = outputs.loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # ðŸ”¥ update progress bar
        progress_bar.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{epochs} - Avg Loss: {avg_loss:.4f}")


In [None]:
model.eval()
predictions, true_labels = [], []

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        preds = torch.argmax(logits, axis=1).cpu().numpy()
        predictions.extend(preds)
        true_labels.extend(batch["labels"].numpy())

accuracy = accuracy_score(true_labels, predictions)
print("BERT Accuracy:", accuracy)

print("\nClassification Report:\n")
print(classification_report(true_labels, predictions, target_names=label_encoder.classes_))


In [None]:
model.save_pretrained("bert_emotion_model")
tokenizer.save_pretrained("bert_emotion_model")

print("Model saved successfully!")


In [None]:
model = BertForSequenceClassification.from_pretrained("bert_emotion_model")
tokenizer = BertTokenizer.from_pretrained("bert_emotion_model")

model.to(device)
model.eval()


In [None]:
text = ["I am extremely happy and excited today"]

inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, axis=1).cpu().numpy()

print("Predicted Emotion:", label_encoder.inverse_transform(prediction))
