In [7]:
from sklearn.preprocessing import LabelEncoder
!pip install -r ../resources/requirements.txt





[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
import torch
import torch.nn as nn
import pandas as pd
from transformers import AutoTokenizer, AutoModel

In [9]:

# Step 1: Load tokenizer & model
MODEL_NAME = "indobenchmark/indobert-base-p1"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
hf_model = AutoModel.from_pretrained(MODEL_NAME)

In [10]:

# Freeze Hugging Face model parameters
for param in hf_model.parameters():
    param.requires_grad = False

# Step 2: Define classifier
class TweetClassifier(nn.Module):
    def __init__(self, hf_model, hidden_size, num_classes):
        super().__init__()
        self.hf_model = hf_model
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )

    def forward(self, input_ids, attention_mask):
        outputs = self.hf_model(input_ids=input_ids, attention_mask=attention_mask)
        cls_embedding = outputs.last_hidden_state[:, 0, :]  # [CLS] token
        return self.fc(cls_embedding)

In [11]:
data = pd.read_csv('../resources/data/Twitter_Emotion_Dataset.csv')
data

Unnamed: 0,label,tweet
0,anger,"Soal jln Jatibaru,polisi tdk bs GERTAK gubernu..."
1,anger,"Sesama cewe lho (kayaknya), harusnya bisa lebi..."
2,happy,Kepingin gudeg mbarek Bu hj. Amad Foto dari go...
3,anger,"Jln Jatibaru,bagian dari wilayah Tn Abang.Peng..."
4,happy,"Sharing pengalaman aja, kemarin jam 18.00 bata..."
...,...,...
4396,love,"Tahukah kamu, bahwa saat itu papa memejamkan m..."
4397,fear,Sulitnya menetapkan Calon Wapresnya Jokowi di ...
4398,anger,"5. masa depannya nggak jelas. lha iya, gimana ..."
4399,happy,[USERNAME] dulu beneran ada mahasiswa Teknik U...


In [None]:

# Step 3: Create model instance
labels = data["label"]
num_classes = len(set(labels))  # example: positive, neutral, negative
model = TweetClassifier(hf_model, hidden_size=768, num_classes=num_classes)

# Step 4: Example data
tweets = ["Aku sangat senang hari ini", "Cuacanya buruk sekali"]
encodings = tokenizer(data["tweet"].tolist(), truncation=True, padding=True, return_tensors="pt")

# Forward pass
logits = model(encodings["input_ids"], encodings["attention_mask"])
print(logits)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
y = torch.tensor(LabelEncoder().fit_transform(labels)) # labels
for epoch in range(50):
    optimizer.zero_grad()
    outputs = model(encodings["input_ids"], encodings["attention_mask"])
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch + 1}, Loss: {loss.item():.4f}')


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
