based on https://huggingface.co/ShinyQ/indobert-sentiment-analysis-indonesian-university-reviews

In [1]:
import os, math, random, csv
from dataclasses import dataclass
from typing import List, Dict

import re
import string
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')
from tqdm import tqdm
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    get_linear_schedule_with_warmup,
)
from sklearn.metrics import classification_report, f1_score


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [26]:


# --- Model checkpoint from Hugging Face ---
MODEL_ID = "indobenchmark/indobert-large-p2"

# --- Define 5 labels (adjust names to your dataset!)
labels = ['anger', 'happy', 'sadness', 'love', 'fear']
label2id: Dict[str, int] = {lb: i for i, lb in enumerate(labels)}
id2label: Dict[int, str] = {i: lb for lb, i in label2id.items()}

# --- Reproducibility ---
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [27]:
@dataclass
class Config:
    csv_path: str = "Twitter_Emotion_Dataset.csv"   # single file with text + label
    epochs: int = 10
    batch_size: int = 16
    lr: float = 5e-5
    max_len: int = 250
    weight_decay: float = 0.01
    warmup_ratio: float = 0.06
    grad_clip: float = 1.0
    num_workers: int = 2
    val_ratio: float = 0.2          # 80% train / 20% val
    out_dir: str = "ckpt_unireviews_5labels"

cfg = Config()

In [28]:
# Indonesian stopwords
stop_words = stopwords.words("indonesian")


In [29]:
def clean_text(text):
    '''Make text lowercase, remove text in square brackets,remove links,remove punctuation
    and remove words containing numbers.'''
    # text = str(text).lower()
    text = " ".join([y for y in text.split() if y not in stop_words])
    return text


In [38]:


class CSVDataset(Dataset):
    def __init__(self, csv_path: str, tokenizer, max_len: int):
        self.rows = []
        with open(csv_path, newline="", encoding="utf-8") as f:
            reader = csv.DictReader(f)
            for r in reader:
                t = clean_text((r.get("tweet") or "").strip())
                y = (r.get("label") or "").strip().lower()
                if t and y in label2id:
                    self.rows.append((t, label2id[y]))
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self): return len(self.rows)

    def __getitem__(self, idx):
        text, y = self.rows[idx]
        enc = self.tokenizer(
            text,
            truncation=True,
            max_length=self.max_len,
            padding=False,
            return_tensors="pt",
        )
        item = {k: v.squeeze(0) for k, v in enc.items()}
        item["labels"] = torch.tensor(y, dtype=torch.long)
        return item

In [39]:
def collate_fn(batch: List[Dict[str, torch.Tensor]]):
    keys = ["input_ids", "attention_mask"]
    if "token_type_ids" in batch[0]:
        keys.append("token_type_ids")
    max_len = max(x["input_ids"].size(0) for x in batch)
    out = {}
    for k in keys:
        pad_id = tokenizer.pad_token_id if k == "input_ids" else 0
        out[k] = torch.stack([
            torch.nn.functional.pad(x[k], (0, max_len - x[k].size(0)), value=pad_id)
            for x in batch
        ])
    out["labels"] = torch.stack([x["labels"] for x in batch])
    return out


In [40]:
def evaluate(model, loader, device):
    model.eval()
    gold, pred = [], []
    with torch.no_grad():
        for batch in loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            logits = model(**{k: v for k, v in batch.items() if k != "labels"}).logits
            pred.extend(logits.argmax(-1).cpu().tolist())
            gold.extend(batch["labels"].cpu().tolist())
    print(classification_report(gold, pred, target_names=labels, digits=4))


In [41]:
    device = "cuda" if torch.cuda.is_available() else "cpu"
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

    # --- Load dataset & split into train/val ---
    full_ds = CSVDataset(cfg.csv_path, tokenizer, cfg.max_len)
    n_val = int(len(full_ds) * cfg.val_ratio)
    n_train = len(full_ds) - n_val
    train_ds, val_ds = random_split(full_ds, [n_train, n_val])

In [42]:

train_loader = DataLoader(train_ds, batch_size=cfg.batch_size,
                          shuffle=True, num_workers=cfg.num_workers,
                          collate_fn=collate_fn)
val_loader = DataLoader(val_ds, batch_size=cfg.batch_size,
                        shuffle=False, num_workers=cfg.num_workers,
                        collate_fn=collate_fn)

In [43]:
# --- Model ---
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_ID, num_labels=len(labels)
)
model.config.id2label = id2label
model.config.label2id = label2id
model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-large-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 1024, padding_idx=0)
      (position_embeddings): Embedding(512, 1024)
      (token_type_embeddings): Embedding(2, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-23): 24 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): LayerNorm((1

In [44]:
# --- Optimizer & scheduler ---
no_decay = ["bias", "LayerNorm.weight"]
grouped = [
    {"params": [p for n,p in model.named_parameters() if not any(nd in n for nd in no_decay)],
      "weight_decay": cfg.weight_decay},
    {"params": [p for n,p in model.named_parameters() if any(nd in n for nd in no_decay)],
      "weight_decay": 0.0},
]
optimizer = torch.optim.AdamW(grouped, lr=cfg.lr)
total_steps = cfg.epochs * math.ceil(len(train_loader))
warmup_steps = int(cfg.warmup_ratio * total_steps)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps
)

In [45]:
    # --- Training loop ---
    os.makedirs(cfg.out_dir, exist_ok=True)
    for epoch in range(1, cfg.epochs + 1):
        model.train()
        running = 0.0
        for step, batch in tqdm(enumerate(train_loader, 1)):
            batch = {k: v.to(device) for k, v in batch.items()}
            out = model(**{k: v for k, v in batch.items() if k != "labels"},
                        labels=batch["labels"])
            loss = out.loss
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.grad_clip)
            optimizer.step(); scheduler.step(); optimizer.zero_grad()
            running += loss.item()
            if step % 50 == 0:
                print(f"epoch {epoch} step {step}: loss {running/50:.4f}")
                running = 0.0

        print(f"\n=== Validation after epoch {epoch} ===")
        evaluate(model, val_loader, device)

        save_path = os.path.join(cfg.out_dir, f"epoch{epoch}")
        model.save_pretrained(save_path)
        tokenizer.save_pretrained(save_path)

    print("\nTraining done.")

50it [00:32,  1.63it/s]

epoch 1 step 50: loss 1.5654


100it [01:04,  1.60it/s]

epoch 1 step 100: loss 1.0323


150it [01:36,  1.60it/s]

epoch 1 step 150: loss 0.7529


200it [02:09,  1.58it/s]

epoch 1 step 200: loss 0.8689


221it [02:22,  1.55it/s]


=== Validation after epoch 1 ===





              precision    recall  f1-score   support

       anger     0.7898    0.6715    0.7258       207
       happy     0.7262    0.8318    0.7754       220
     sadness     0.6300    0.6528    0.6412       193
        love     0.8761    0.7795    0.8250       127
        fear     0.7698    0.8045    0.7868       133

    accuracy                         0.7432       880
   macro avg     0.7584    0.7480    0.7509       880
weighted avg     0.7483    0.7432    0.7432       880



50it [00:32,  1.51it/s]

epoch 2 step 50: loss 0.5136


100it [01:05,  1.47it/s]

epoch 2 step 100: loss 0.4921


150it [01:36,  1.51it/s]

epoch 2 step 150: loss 0.4781


200it [02:09,  1.51it/s]

epoch 2 step 200: loss 0.5413


221it [02:22,  1.55it/s]


=== Validation after epoch 2 ===





              precision    recall  f1-score   support

       anger     0.7585    0.7585    0.7585       207
       happy     0.8726    0.6227    0.7268       220
     sadness     0.5776    0.8290    0.6809       193
        love     0.8125    0.8189    0.8157       127
        fear     0.9099    0.7594    0.8279       133

    accuracy                         0.7489       880
   macro avg     0.7862    0.7577    0.7619       880
weighted avg     0.7780    0.7489    0.7523       880



50it [00:32,  1.45it/s]

epoch 3 step 50: loss 0.1854


100it [01:04,  1.61it/s]

epoch 3 step 100: loss 0.1842


150it [01:37,  1.57it/s]

epoch 3 step 150: loss 0.2105


200it [02:09,  1.58it/s]

epoch 3 step 200: loss 0.1796


221it [02:22,  1.55it/s]


=== Validation after epoch 3 ===





              precision    recall  f1-score   support

       anger     0.7838    0.7005    0.7398       207
       happy     0.8116    0.7636    0.7869       220
     sadness     0.5920    0.7668    0.6682       193
        love     0.8468    0.7402    0.7899       127
        fear     0.8425    0.8045    0.8231       133

    accuracy                         0.7523       880
   macro avg     0.7753    0.7551    0.7616       880
weighted avg     0.7667    0.7523    0.7557       880



50it [00:33,  1.50it/s]

epoch 4 step 50: loss 0.0514


100it [01:04,  1.54it/s]

epoch 4 step 100: loss 0.0576


150it [01:36,  1.57it/s]

epoch 4 step 150: loss 0.0667


200it [02:09,  1.46it/s]

epoch 4 step 200: loss 0.1173


221it [02:22,  1.55it/s]



=== Validation after epoch 4 ===
              precision    recall  f1-score   support

       anger     0.7184    0.8502    0.7788       207
       happy     0.7725    0.8182    0.7947       220
     sadness     0.6957    0.5803    0.6328       193
        love     0.8291    0.7638    0.7951       127
        fear     0.8226    0.7669    0.7938       133

    accuracy                         0.7580       880
   macro avg     0.7676    0.7559    0.7590       880
weighted avg     0.7587    0.7580    0.7554       880



50it [00:32,  1.48it/s]

epoch 5 step 50: loss 0.0145


100it [01:05,  1.59it/s]

epoch 5 step 100: loss 0.0343


150it [01:37,  1.68it/s]

epoch 5 step 150: loss 0.0381


200it [02:09,  1.66it/s]

epoch 5 step 200: loss 0.0289


221it [02:23,  1.54it/s]


=== Validation after epoch 5 ===





              precision    recall  f1-score   support

       anger     0.7778    0.7778    0.7778       207
       happy     0.7283    0.8409    0.7806       220
     sadness     0.6936    0.6218    0.6557       193
        love     0.8571    0.8031    0.8293       127
        fear     0.8110    0.7744    0.7923       133

    accuracy                         0.7625       880
   macro avg     0.7736    0.7636    0.7671       880
weighted avg     0.7634    0.7625    0.7613       880



50it [00:33,  1.50it/s]

epoch 6 step 50: loss 0.0122


100it [01:05,  1.58it/s]

epoch 6 step 100: loss 0.0305


150it [01:37,  1.48it/s]

epoch 6 step 150: loss 0.0056


200it [02:09,  1.63it/s]

epoch 6 step 200: loss 0.0297


221it [02:23,  1.54it/s]


=== Validation after epoch 6 ===





              precision    recall  f1-score   support

       anger     0.8168    0.7536    0.7839       207
       happy     0.7883    0.7955    0.7919       220
     sadness     0.6377    0.6839    0.6600       193
        love     0.7846    0.8031    0.7938       127
        fear     0.8231    0.8045    0.8137       133

    accuracy                         0.7636       880
   macro avg     0.7701    0.7681    0.7686       880
weighted avg     0.7667    0.7636    0.7646       880



50it [00:31,  1.58it/s]

epoch 7 step 50: loss 0.0208


100it [01:02,  1.54it/s]

epoch 7 step 100: loss 0.0078


150it [01:34,  1.60it/s]

epoch 7 step 150: loss 0.0114


200it [02:07,  1.45it/s]

epoch 7 step 200: loss 0.0173


221it [02:20,  1.57it/s]


=== Validation after epoch 7 ===





              precision    recall  f1-score   support

       anger     0.7981    0.8213    0.8095       207
       happy     0.7895    0.8182    0.8036       220
     sadness     0.6947    0.6839    0.6893       193
        love     0.8125    0.8189    0.8157       127
        fear     0.8512    0.7744    0.8110       133

    accuracy                         0.7830       880
   macro avg     0.7892    0.7833    0.7858       880
weighted avg     0.7834    0.7830    0.7828       880



50it [00:31,  1.44it/s]

epoch 8 step 50: loss 0.0004


100it [01:02,  1.56it/s]

epoch 8 step 100: loss 0.0140


150it [01:35,  1.36it/s]

epoch 8 step 150: loss 0.0012


200it [02:07,  1.58it/s]

epoch 8 step 200: loss 0.0122


221it [02:20,  1.58it/s]


=== Validation after epoch 8 ===





              precision    recall  f1-score   support

       anger     0.7808    0.8261    0.8028       207
       happy     0.7845    0.8273    0.8053       220
     sadness     0.7074    0.6891    0.6982       193
        love     0.8203    0.8268    0.8235       127
        fear     0.8850    0.7519    0.8130       133

    accuracy                         0.7852       880
   macro avg     0.7956    0.7842    0.7886       880
weighted avg     0.7871    0.7852    0.7850       880



50it [00:31,  1.60it/s]

epoch 9 step 50: loss 0.0037


100it [01:02,  1.66it/s]

epoch 9 step 100: loss 0.0003


150it [01:35,  1.63it/s]

epoch 9 step 150: loss 0.0079


200it [02:07,  1.49it/s]

epoch 9 step 200: loss 0.0065


221it [02:20,  1.58it/s]


=== Validation after epoch 9 ===





              precision    recall  f1-score   support

       anger     0.7919    0.7536    0.7723       207
       happy     0.7867    0.8045    0.7955       220
     sadness     0.6538    0.7047    0.6783       193
        love     0.8125    0.8189    0.8157       127
        fear     0.8443    0.7744    0.8078       133

    accuracy                         0.7682       880
   macro avg     0.7778    0.7712    0.7739       880
weighted avg     0.7712    0.7682    0.7691       880



50it [00:32,  1.50it/s]

epoch 10 step 50: loss 0.0002


100it [01:02,  1.56it/s]

epoch 10 step 100: loss 0.0056


150it [01:34,  1.67it/s]

epoch 10 step 150: loss 0.0021


200it [02:06,  1.69it/s]

epoch 10 step 200: loss 0.0071


221it [02:19,  1.58it/s]


=== Validation after epoch 10 ===





              precision    recall  f1-score   support

       anger     0.7902    0.7826    0.7864       207
       happy     0.7876    0.8091    0.7982       220
     sadness     0.6716    0.6995    0.6853       193
        love     0.8154    0.8346    0.8249       127
        fear     0.8644    0.7669    0.8127       133

    accuracy                         0.7761       880
   macro avg     0.7859    0.7785    0.7815       880
weighted avg     0.7784    0.7761    0.7767       880


Training done.
