In [1]:
!pip install -U \
    torch torchvision torchaudio \
    transformers datasets accelerate scikit-learn \
    pandas numpy matplotlib tqdm \
    --quiet

In [2]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import pandas as pd
from tqdm import tqdm
import numpy as np

device = (
    "cuda" if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available()
    else "cpu"
)
print(f"üîπ Using device: {device}")


üîπ Using device: cuda


In [3]:
# üõ† Fix for RunPod fast download issue
!pip install hf_transfer --quiet

import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"  # disable fast mode to avoid errors

from datasets import load_dataset

goemotions = load_dataset("go_emotions")

print(goemotions)
print("\nFeatures:", goemotions["train"].features)


DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 43410
    })
    validation: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5426
    })
    test: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5427
    })
})

Features: {'text': Value('string'), 'labels': List(ClassLabel(names=['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'])), 'id': Value('string')}


In [4]:
# Load all splits
goemotions = load_dataset("go_emotions")

print(goemotions)
print("\nFeatures:", goemotions["train"].features)

DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 43410
    })
    validation: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5426
    })
    test: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5427
    })
})

Features: {'text': Value('string'), 'labels': List(ClassLabel(names=['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'])), 'id': Value('string')}


In [5]:
# Convert splits to DataFrames
train_df = goemotions["train"].to_pandas()
val_split_df = goemotions["validation"].to_pandas()
test_split_df = goemotions["test"].to_pandas()

# ‚úÖ Combine train + validation as training data
train_df = pd.concat([train_df, val_split_df]).reset_index(drop=True)

# ‚úÖ Rename test split as validation dataframe
val_df = test_split_df.copy()

print(f"‚úÖ Combined training set size: {len(train_df)}")
print(f"üß™ Validation (test) set size: {len(val_df)}")
print(f"Columns: {train_df.columns.tolist()}")

‚úÖ Combined training set size: 48836
üß™ Validation (test) set size: 5427
Columns: ['text', 'labels', 'id']


In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Pre-trained model from Hugging Face
MODEL_NAME = "SamLowe/roberta-base-go_emotions"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

# Send to GPU if available
model.to(device)

print(f"‚úÖ Loaded model: {MODEL_NAME}")
print(f"üì¶ Number of labels: {model.config.num_labels}")
print(f"‚öôÔ∏è Problem type: {model.config.problem_type}")

‚úÖ Loaded model: SamLowe/roberta-base-go_emotions
üì¶ Number of labels: 28
‚öôÔ∏è Problem type: multi_label_classification


In [7]:
print("Columns in val_df:", val_df.columns.tolist())
print(val_df.head(2))

Columns in val_df: ['text', 'labels', 'id']
                                                text labels       id
0  I‚Äôm really sorry about your situation :( Altho...   [25]  eecwqtt
1    It's wonderful because it's awful. At not with.    [0]  ed5f85d


In [8]:
from tqdm.auto import tqdm
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# ‚úÖ Ensure proper type for labels (handles list, numpy array, int)
val_df = val_df.copy()

def normalize_label(x):
    if isinstance(x, list):
        return x
    elif isinstance(x, np.ndarray):
        return x.tolist()
    else:
        return [int(x)]

val_df["labels"] = val_df["labels"].apply(normalize_label)
val_df["main_label"] = val_df["labels"].apply(lambda x: x[0] if len(x) > 0 else None)
val_df = val_df.dropna(subset=["main_label"]).reset_index(drop=True)

print(f"‚úÖ Using {len(val_df)} samples from test split for evaluation.")
print(f"Columns: {val_df.columns.tolist()}")
print(val_df.head(2))

# ‚úÖ Prepare text and labels
texts = val_df["text"].astype(str).tolist()
labels = val_df["main_label"].astype(int).tolist()

# üßÆ Run inference
model.eval()
batch_size = 32
pred_ids = []

for i in tqdm(range(0, len(texts), batch_size), desc="Evaluating pre-trained model"):
    batch_texts = texts[i:i+batch_size]
    enc = tokenizer(
        batch_texts,
        padding=True,
        truncation=True,
        max_length=256,
        return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        logits = model(**enc).logits
        preds = torch.argmax(logits, dim=-1)
        pred_ids.extend(preds.cpu().tolist())

# ‚úÖ Evaluation metrics
y_true = np.array(labels[:len(pred_ids)])
y_pred = np.array(pred_ids[:len(y_true)])

print(f"\nüßÆ Evaluation on {len(y_true)} samples\n")
print("Accuracy :", accuracy_score(y_true, y_pred))
print("Precision:", precision_score(y_true, y_pred, average="weighted", zero_division=0))
print("Recall   :", recall_score(y_true, y_pred, average="weighted", zero_division=0))
print("F1 Score :", f1_score(y_true, y_pred, average="weighted", zero_division=0))

print("\nDetailed Classification Report:\n")
ordered_labels = [model.config.id2label[i] for i in sorted(model.config.id2label.keys(), key=int)]
print(classification_report(
    y_true, y_pred,
    labels=sorted(model.config.id2label.keys(), key=int),
    target_names=ordered_labels,
    zero_division=0
))

‚úÖ Using 5427 samples from test split for evaluation.
Columns: ['text', 'labels', 'id', 'main_label']
                                                text labels       id  \
0  I‚Äôm really sorry about your situation :( Altho...   [25]  eecwqtt   
1    It's wonderful because it's awful. At not with.    [0]  ed5f85d   

   main_label  
0          25  
1           0  


Evaluating pre-trained model:   0%|          | 0/170 [00:00<?, ?it/s]


üßÆ Evaluation on 5427 samples

Accuracy : 0.5741662060070021
Precision: 0.5649094578775328
Recall   : 0.5741662060070021
F1 Score : 0.5630038779860694

Detailed Classification Report:

                precision    recall  f1-score   support

    admiration       0.71      0.63      0.67       504
     amusement       0.75      0.81      0.78       252
         anger       0.62      0.43      0.50       197
     annoyance       0.37      0.26      0.30       286
      approval       0.45      0.35      0.39       318
        caring       0.40      0.36      0.38       114
     confusion       0.42      0.45      0.44       139
     curiosity       0.46      0.45      0.46       233
        desire       0.54      0.47      0.50        74
disappointment       0.47      0.31      0.38       127
   disapproval       0.43      0.40      0.42       220
       disgust       0.51      0.49      0.50        84
 embarrassment       0.76      0.43      0.55        30
    excitement       0.44  

### Fine-tuning

In [9]:
from torch.utils.data import DataLoader, Dataset
import torch

# ‚úÖ Custom dataset class for fine-tuning
class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=256):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )
        item = {key: val.squeeze(0) for key, val in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

In [14]:
# ‚úÖ Create datasets from our DataFrames ‚Äî ensure single-label per sample
def extract_first_label(x):
    if isinstance(x, list) and len(x) > 0:
        return int(x[0])
    elif isinstance(x, (int, float)):
        return int(x)
    elif hasattr(x, "__iter__"):
        # sometimes numpy arrays
        return int(list(x)[0])
    else:
        return 0  # fallback if label missing

train_labels = [extract_first_label(x) for x in train_df["labels"]]
val_labels   = [extract_first_label(x) for x in val_df["labels"]]

train_dataset = EmotionDataset(
    train_df["text"].astype(str).tolist(),
    train_labels,
    tokenizer
)

val_dataset = EmotionDataset(
    val_df["text"].astype(str).tolist(),
    val_labels,
    tokenizer
)

# ‚úÖ Data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=32, shuffle=False)

print(f"‚úÖ Train samples: {len(train_dataset)} | Validation samples: {len(val_dataset)}")
print("Example labels:", train_labels[:10])

‚úÖ Train samples: 48836 | Validation samples: 5427
Example labels: [27, 27, 2, 14, 3, 26, 15, 8, 0, 27]


In [15]:
import torch
from torch.optim import AdamW  # ‚úÖ Correct import for Transformers ‚â• 5.0
from transformers import get_linear_schedule_with_warmup

# ‚úÖ Optimiser & scheduler
optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)
num_epochs = 4

total_steps = len(train_loader) * num_epochs
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.1 * total_steps),
    num_training_steps=total_steps
)

print(f"‚úÖ Training setup ready | Epochs: {num_epochs} | Total steps: {total_steps}")

‚úÖ Training setup ready | Epochs: 4 | Total steps: 12212


In [17]:
# ‚úÖ Force model into single-label classification mode
from transformers import AutoConfig

config = model.config
config.problem_type = "single_label_classification"
model.config = config

print("‚úÖ Model problem_type set to:", model.config.problem_type)

‚úÖ Model problem_type set to: single_label_classification


In [18]:
from tqdm.auto import tqdm
import torch.nn.functional as F

best_val_loss = float("inf")
patience, patience_counter = 2, 0

for epoch in range(num_epochs):
    print(f"\nüü¢ Epoch {epoch+1}/{num_epochs}")
    model.train()
    total_loss, total_correct = 0, 0

    for batch in tqdm(train_loader, desc="Training"):
        optimizer.zero_grad()
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        logits = outputs.logits
        loss.backward()
        optimizer.step()
        scheduler.step()
        total_loss += loss.item()

        preds = torch.argmax(logits, dim=1)
        total_correct += (preds == batch["labels"]).sum().item()

    avg_train_loss = total_loss / len(train_loader)
    train_acc = total_correct / len(train_dataset)

    # ‚úÖ Validation
    model.eval()
    val_loss, val_correct = 0, 0
    with torch.no_grad():
        for batch in val_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            val_loss += outputs.loss.item()
            preds = torch.argmax(outputs.logits, dim=1)
            val_correct += (preds == batch["labels"]).sum().item()

    avg_val_loss = val_loss / len(val_loader)
    val_acc = val_correct / len(val_dataset)

    print(f"Train loss: {avg_train_loss:.4f} | Val loss: {avg_val_loss:.4f} | "
          f"Train acc: {train_acc:.4f} | Val acc: {val_acc:.4f}")

    # ‚úÖ Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        model.save_pretrained("best_roberta_finetuned")
        tokenizer.save_pretrained("best_roberta_finetuned")
        print("üíæ Saved new best model!")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("‚õî Early stopping triggered.")
            break


üü¢ Epoch 1/4


Training:   0%|          | 0/3053 [00:00<?, ?it/s]

Train loss: 1.0345 | Val loss: 1.4055 | Train acc: 0.6773 | Val acc: 0.5720
üíæ Saved new best model!

üü¢ Epoch 2/4


Training:   0%|          | 0/3053 [00:00<?, ?it/s]

Train loss: 0.8819 | Val loss: 1.4670 | Train acc: 0.7207 | Val acc: 0.5790

üü¢ Epoch 3/4


Training:   0%|          | 0/3053 [00:00<?, ?it/s]

Train loss: 0.6907 | Val loss: 1.5976 | Train acc: 0.7831 | Val acc: 0.5591
‚õî Early stopping triggered.


In [19]:
import os

# ‚úÖ Define output directory
save_dir = "./Roberta/Roberta_finetuned_model"
os.makedirs(save_dir, exist_ok=True)

# ‚úÖ Save model, tokenizer, and config
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)
model.config.save_pretrained(save_dir)

print(f"‚úÖ Fine-tuned model saved successfully to ‚Üí {save_dir}")

‚úÖ Fine-tuned model saved successfully to ‚Üí ./Roberta/Roberta_finetuned_model
