# Finetuning Emot
Emot is a Emotion Recognition dataset with 5 possible labels: `sadness`, `anger`, `love`, `fear`, `happy`

In [None]:
import os, sys

import random
import numpy as np
import pandas as pd
import torch
from torch import optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from tqdm import tqdm

from transformers import BertForSequenceClassification, BertConfig, BertTokenizer
from nltk.tokenize import TweetTokenizer

In [None]:
# Forward function for sequence classification
def forward_sequence_classification(
    model, batch_data, i2w, is_test=False, device="cpu", **kwargs
):
    # Unpack batch data
    if len(batch_data) == 3:
        (subword_batch, mask_batch, label_batch) = batch_data
        token_type_batch = None
    elif len(batch_data) == 4:
        (subword_batch, mask_batch, token_type_batch, label_batch) = batch_data

    # Prepare input & label
    subword_batch = torch.LongTensor(subword_batch)
    mask_batch = torch.FloatTensor(mask_batch)
    token_type_batch = (
        torch.LongTensor(token_type_batch) if token_type_batch is not None else None
    )
    label_batch = torch.LongTensor(label_batch)

    if device == "cuda":
        subword_batch = subword_batch.cuda()
        mask_batch = mask_batch.cuda()
        token_type_batch = (
            token_type_batch.cuda() if token_type_batch is not None else None
        )
        label_batch = label_batch.cuda()

    # Forward model
    outputs = model(
        subword_batch,
        attention_mask=mask_batch,
        token_type_ids=token_type_batch,
        labels=label_batch,
    )
    loss, logits = outputs[:2]

    # generate prediction & label list
    list_hyp = []
    list_label = []
    hyp = torch.topk(logits, 1)[1]
    for j in range(len(hyp)):
        list_hyp.append(i2w[hyp[j].item()])
        list_label.append(i2w[label_batch[j][0].item()])

    return loss, list_hyp, list_label

In [None]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

def document_sentiment_metrics_fn(list_hyp, list_label):
    metrics = {}
    metrics["ACC"] = accuracy_score(list_label, list_hyp)
    metrics["F1"] = f1_score(list_label, list_hyp, average="macro")
    metrics["REC"] = recall_score(list_label, list_hyp, average="macro")
    metrics["PRE"] = precision_score(list_label, list_hyp, average="macro")
    return metrics

In [None]:
#####
# Emotion Twitter
#####
class EmotionDetectionDataset(Dataset):
    # Static constant variable
    LABEL2INDEX = {"sadness": 0, "anger": 1, "love": 2, "fear": 3, "happy": 4}
    INDEX2LABEL = {0: "sadness", 1: "anger", 2: "love", 3: "fear", 4: "happy"}
    NUM_LABELS = 5

    def load_dataset(self, path):
        # Load dataset
        dataset = pd.read_csv(path)
        dataset["label"] = dataset["label"].apply(lambda sen: self.LABEL2INDEX[sen])
        return dataset

    def __init__(
        self, dataset_path, tokenizer, no_special_token=False, *args, **kwargs
    ):
        self.data = self.load_dataset(dataset_path)
        self.tokenizer = tokenizer
        self.no_special_token = no_special_token

    def __getitem__(self, index):
        tweet, label = self.data.loc[index, "tweet"], self.data.loc[index, "label"]
        subwords = self.tokenizer.encode(
            tweet, add_special_tokens=not self.no_special_token
        )
        return np.array(subwords), np.array(label), tweet

    def __len__(self):
        return len(self.data)


class EmotionDetectionDataLoader(DataLoader):
    def __init__(self, max_seq_len=512, *args, **kwargs):
        super(EmotionDetectionDataLoader, self).__init__(*args, **kwargs)
        self.collate_fn = self._collate_fn
        self.max_seq_len = max_seq_len

    def _collate_fn(self, batch):
        batch_size = len(batch)
        max_seq_len = max(map(lambda x: len(x[0]), batch))
        max_seq_len = min(self.max_seq_len, max_seq_len)

        subword_batch = np.zeros((batch_size, max_seq_len), dtype=np.int64)
        mask_batch = np.zeros((batch_size, max_seq_len), dtype=np.float32)
        label_batch = np.full((batch_size, 1), -100, dtype=np.int64)

        seq_list = []
        for i, (subwords, label, raw_seq) in enumerate(batch):
            subwords = subwords[:max_seq_len]
            subword_batch[i, : len(subwords)] = subwords
            mask_batch[i, : len(subwords)] = 1
            label_batch[i] = label

            seq_list.append(raw_seq)

        return subword_batch, mask_batch, label_batch, seq_list

In [None]:
###
# common functions
###
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

def count_param(module, trainable=False):
    if trainable:
        return sum(p.numel() for p in module.parameters() if p.requires_grad)
    else:
        return sum(p.numel() for p in module.parameters())

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def metrics_to_string(metric_dict):
    string_list = []
    for key, value in metric_dict.items():
        string_list.append('{}:{:.2f}'.format(key, value))
    return ' '.join(string_list)

In [None]:
# Set random seed
set_seed(26092020)

# Load Model

In [None]:
# Load Tokenizer and Config
tokenizer = BertTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
config = BertConfig.from_pretrained('indobenchmark/indobert-base-p2')
config.num_labels = EmotionDetectionDataset.NUM_LABELS

# Instantiate model
model = BertForSequenceClassification.from_pretrained('indobenchmark/indobert-base-p2', config=config)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
count_param(model)

124445189

# Prepare Dataset

In [None]:
train_dataset_path = "https://raw.githubusercontent.com/IndoNLP/indonlu/master/dataset/emot_emotion-twitter/train_preprocess.csv"
valid_dataset_path = "https://raw.githubusercontent.com/IndoNLP/indonlu/master/dataset/emot_emotion-twitter/valid_preprocess.csv"
test_dataset_path = "https://raw.githubusercontent.com/IndoNLP/indonlu/master/dataset/emot_emotion-twitter/test_preprocess_masked_label.csv"

In [None]:
train_dataset = EmotionDetectionDataset(train_dataset_path, tokenizer, lowercase=True)
valid_dataset = EmotionDetectionDataset(valid_dataset_path, tokenizer, lowercase=True)
test_dataset = EmotionDetectionDataset(test_dataset_path, tokenizer, lowercase=True)

train_loader = EmotionDetectionDataLoader(dataset=train_dataset, max_seq_len=512, batch_size=32, num_workers=16, shuffle=True)
valid_loader = EmotionDetectionDataLoader(dataset=valid_dataset, max_seq_len=512, batch_size=32, num_workers=16, shuffle=False)
test_loader = EmotionDetectionDataLoader(dataset=test_dataset, max_seq_len=512, batch_size=32, num_workers=16, shuffle=False)



In [None]:
w2i, i2w = EmotionDetectionDataset.LABEL2INDEX, EmotionDetectionDataset.INDEX2LABEL
print(w2i)
print(i2w)

{'sadness': 0, 'anger': 1, 'love': 2, 'fear': 3, 'happy': 4}
{0: 'sadness', 1: 'anger', 2: 'love', 3: 'fear', 4: 'happy'}


# Test model on sample sentences

In [None]:
text = 'Bahagia hatiku melihat pernikahan putri sulungku yang cantik jelita'
subwords = tokenizer.encode(text)
subwords = torch.LongTensor(subwords).view(1, -1).to(model.device)

logits = model(subwords)[0]
label = torch.topk(logits, k=1, dim=-1)[1].squeeze().item()

print(f'Text: {text} | Label : {i2w[label]} ({F.softmax(logits, dim=-1).squeeze()[label] * 100:.3f}%)')

Text: Bahagia hatiku melihat pernikahan putri sulungku yang cantik jelita | Label : sadness (25.796%)


In [None]:
text = 'Budi pergi ke pondok indah mall membeli cakwe'
subwords = tokenizer.encode(text)
subwords = torch.LongTensor(subwords).view(1, -1).to(model.device)

logits = model(subwords)[0]
label = torch.topk(logits, k=1, dim=-1)[1].squeeze().item()

print(f'Text: {text} | Label : {i2w[label]} ({F.softmax(logits, dim=-1).squeeze()[label] * 100:.3f}%)')

Text: Budi pergi ke pondok indah mall membeli cakwe | Label : sadness (27.013%)


In [None]:
text = 'Dasar anak sialan!! Kurang ajar!!'
subwords = tokenizer.encode(text)
subwords = torch.LongTensor(subwords).view(1, -1).to(model.device)

logits = model(subwords)[0]
label = torch.topk(logits, k=1, dim=-1)[1].squeeze().item()

print(f'Text: {text} | Label : {i2w[label]} ({F.softmax(logits, dim=-1).squeeze()[label] * 100:.3f}%)')

Text: Dasar anak sialan!! Kurang ajar!! | Label : sadness (27.658%)


# Fine Tuning & Evaluation

In [None]:
optimizer = optim.Adam(model.parameters(), lr=5e-6)

model = model.cuda(device=0)

In [None]:
# Train
n_epochs = 20
for epoch in range(n_epochs):
    model.train()
    torch.set_grad_enabled(True)

    total_train_loss = 0
    list_hyp, list_label = [], []

    train_pbar = tqdm(train_loader, leave=True, total=len(train_loader))
    for i, batch_data in enumerate(train_pbar):
        # Forward model
        loss, batch_hyp, batch_label = forward_sequence_classification(model, batch_data[:-1], i2w=i2w, device='cuda')

        # Update model
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        tr_loss = loss.item()
        total_train_loss = total_train_loss + tr_loss

        # Calculate metrics
        list_hyp += batch_hyp
        list_label += batch_label

        train_pbar.set_description("(Epoch {}) TRAIN LOSS:{:.4f} LR:{:.8f}".format((epoch+1),
            total_train_loss/(i+1), get_lr(optimizer)))

    # Calculate train metric
    metrics = document_sentiment_metrics_fn(list_hyp, list_label)
    print("(Epoch {}) TRAIN LOSS:{:.4f} {} LR:{:.8f}".format((epoch+1),
        total_train_loss/(i+1), metrics_to_string(metrics), get_lr(optimizer)))

    # Evaluate on validation
    model.eval()
    torch.set_grad_enabled(False)

    total_loss, total_correct, total_labels = 0, 0, 0
    list_hyp, list_label = [], []

    pbar = tqdm(valid_loader, leave=True, total=len(valid_loader))
    for i, batch_data in enumerate(pbar):
        batch_seq = batch_data[-1]
        loss, batch_hyp, batch_label = forward_sequence_classification(model, batch_data[:-1], i2w=i2w, device='cuda')

        # Calculate total loss
        valid_loss = loss.item()
        total_loss = total_loss + valid_loss

        # Calculate evaluation metrics
        list_hyp += batch_hyp
        list_label += batch_label
        metrics = document_sentiment_metrics_fn(list_hyp, list_label)

        pbar.set_description("VALID LOSS:{:.4f} {}".format(total_loss/(i+1), metrics_to_string(metrics)))

    metrics = document_sentiment_metrics_fn(list_hyp, list_label)
    print("(Epoch {}) VALID LOSS:{:.4f} {}".format((epoch+1),
        total_loss/(i+1), metrics_to_string(metrics)))

(Epoch 1) TRAIN LOSS:1.3361 LR:0.00000500: 100%|██████████| 111/111 [00:49<00:00,  2.22it/s]


(Epoch 1) TRAIN LOSS:1.3361 ACC:0.46 F1:0.42 REC:0.42 PRE:0.48 LR:0.00000500


VALID LOSS:1.0409 ACC:0.59 F1:0.58 REC:0.58 PRE:0.61: 100%|██████████| 14/14 [00:03<00:00,  4.42it/s]


(Epoch 1) VALID LOSS:1.0409 ACC:0.59 F1:0.58 REC:0.58 PRE:0.61


(Epoch 2) TRAIN LOSS:0.8947 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.27it/s]


(Epoch 2) TRAIN LOSS:0.8947 ACC:0.69 F1:0.68 REC:0.68 PRE:0.69 LR:0.00000500


VALID LOSS:0.8040 ACC:0.68 F1:0.68 REC:0.68 PRE:0.68: 100%|██████████| 14/14 [00:03<00:00,  3.55it/s]


(Epoch 2) VALID LOSS:0.8040 ACC:0.68 F1:0.68 REC:0.68 PRE:0.68


(Epoch 3) TRAIN LOSS:0.6346 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.29it/s]


(Epoch 3) TRAIN LOSS:0.6346 ACC:0.78 F1:0.78 REC:0.78 PRE:0.79 LR:0.00000500


VALID LOSS:0.7765 ACC:0.70 F1:0.70 REC:0.71 PRE:0.72: 100%|██████████| 14/14 [00:03<00:00,  4.28it/s]


(Epoch 3) VALID LOSS:0.7765 ACC:0.70 F1:0.70 REC:0.71 PRE:0.72


(Epoch 4) TRAIN LOSS:0.4890 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.28it/s]


(Epoch 4) TRAIN LOSS:0.4890 ACC:0.84 F1:0.84 REC:0.84 PRE:0.84 LR:0.00000500


VALID LOSS:0.7656 ACC:0.71 F1:0.71 REC:0.72 PRE:0.72: 100%|██████████| 14/14 [00:03<00:00,  4.35it/s]


(Epoch 4) VALID LOSS:0.7656 ACC:0.71 F1:0.71 REC:0.72 PRE:0.72


(Epoch 5) TRAIN LOSS:0.3817 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.29it/s]


(Epoch 5) TRAIN LOSS:0.3817 ACC:0.87 F1:0.88 REC:0.87 PRE:0.88 LR:0.00000500


VALID LOSS:0.8170 ACC:0.72 F1:0.73 REC:0.72 PRE:0.73: 100%|██████████| 14/14 [00:03<00:00,  4.01it/s]


(Epoch 5) VALID LOSS:0.8170 ACC:0.72 F1:0.73 REC:0.72 PRE:0.73


(Epoch 6) TRAIN LOSS:0.2637 LR:0.00000500: 100%|██████████| 111/111 [00:49<00:00,  2.23it/s]


(Epoch 6) TRAIN LOSS:0.2637 ACC:0.92 F1:0.92 REC:0.92 PRE:0.93 LR:0.00000500


VALID LOSS:0.8743 ACC:0.71 F1:0.71 REC:0.71 PRE:0.72: 100%|██████████| 14/14 [00:03<00:00,  3.74it/s]


(Epoch 6) VALID LOSS:0.8743 ACC:0.71 F1:0.71 REC:0.71 PRE:0.72


(Epoch 7) TRAIN LOSS:0.1886 LR:0.00000500: 100%|██████████| 111/111 [00:49<00:00,  2.26it/s]


(Epoch 7) TRAIN LOSS:0.1886 ACC:0.95 F1:0.95 REC:0.94 PRE:0.95 LR:0.00000500


VALID LOSS:0.9420 ACC:0.71 F1:0.72 REC:0.72 PRE:0.73: 100%|██████████| 14/14 [00:03<00:00,  4.48it/s]


(Epoch 7) VALID LOSS:0.9420 ACC:0.71 F1:0.72 REC:0.72 PRE:0.73


(Epoch 8) TRAIN LOSS:0.1246 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.31it/s]


(Epoch 8) TRAIN LOSS:0.1246 ACC:0.97 F1:0.97 REC:0.97 PRE:0.97 LR:0.00000500


VALID LOSS:1.0525 ACC:0.70 F1:0.71 REC:0.70 PRE:0.71: 100%|██████████| 14/14 [00:03<00:00,  4.36it/s]


(Epoch 8) VALID LOSS:1.0525 ACC:0.70 F1:0.71 REC:0.70 PRE:0.71


(Epoch 9) TRAIN LOSS:0.0783 LR:0.00000500: 100%|██████████| 111/111 [00:49<00:00,  2.24it/s]


(Epoch 9) TRAIN LOSS:0.0783 ACC:0.98 F1:0.98 REC:0.98 PRE:0.98 LR:0.00000500


VALID LOSS:1.1079 ACC:0.70 F1:0.71 REC:0.71 PRE:0.72: 100%|██████████| 14/14 [00:03<00:00,  3.68it/s]


(Epoch 9) VALID LOSS:1.1079 ACC:0.70 F1:0.71 REC:0.71 PRE:0.72


(Epoch 10) TRAIN LOSS:0.0634 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.30it/s]


(Epoch 10) TRAIN LOSS:0.0634 ACC:0.99 F1:0.99 REC:0.99 PRE:0.99 LR:0.00000500


VALID LOSS:1.2022 ACC:0.70 F1:0.71 REC:0.70 PRE:0.72: 100%|██████████| 14/14 [00:03<00:00,  4.45it/s]


(Epoch 10) VALID LOSS:1.2022 ACC:0.70 F1:0.71 REC:0.70 PRE:0.72


(Epoch 11) TRAIN LOSS:0.0663 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.31it/s]


(Epoch 11) TRAIN LOSS:0.0663 ACC:0.98 F1:0.98 REC:0.98 PRE:0.99 LR:0.00000500


VALID LOSS:1.2357 ACC:0.69 F1:0.70 REC:0.70 PRE:0.70: 100%|██████████| 14/14 [00:03<00:00,  4.49it/s]


(Epoch 11) VALID LOSS:1.2357 ACC:0.69 F1:0.70 REC:0.70 PRE:0.70


(Epoch 12) TRAIN LOSS:0.0361 LR:0.00000500: 100%|██████████| 111/111 [00:49<00:00,  2.24it/s]


(Epoch 12) TRAIN LOSS:0.0361 ACC:0.99 F1:0.99 REC:0.99 PRE:0.99 LR:0.00000500


VALID LOSS:1.3118 ACC:0.69 F1:0.69 REC:0.70 PRE:0.69: 100%|██████████| 14/14 [00:03<00:00,  3.78it/s]


(Epoch 12) VALID LOSS:1.3118 ACC:0.69 F1:0.69 REC:0.70 PRE:0.69


(Epoch 13) TRAIN LOSS:0.0237 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.29it/s]


(Epoch 13) TRAIN LOSS:0.0237 ACC:1.00 F1:1.00 REC:1.00 PRE:1.00 LR:0.00000500


VALID LOSS:1.3051 ACC:0.68 F1:0.69 REC:0.69 PRE:0.69: 100%|██████████| 14/14 [00:03<00:00,  4.51it/s]


(Epoch 13) VALID LOSS:1.3051 ACC:0.68 F1:0.69 REC:0.69 PRE:0.69


(Epoch 14) TRAIN LOSS:0.0204 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.29it/s]


(Epoch 14) TRAIN LOSS:0.0204 ACC:1.00 F1:1.00 REC:1.00 PRE:1.00 LR:0.00000500


VALID LOSS:1.3673 ACC:0.68 F1:0.69 REC:0.69 PRE:0.69: 100%|██████████| 14/14 [00:03<00:00,  4.34it/s]


(Epoch 14) VALID LOSS:1.3673 ACC:0.68 F1:0.69 REC:0.69 PRE:0.69


(Epoch 15) TRAIN LOSS:0.0224 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.29it/s]


(Epoch 15) TRAIN LOSS:0.0224 ACC:1.00 F1:1.00 REC:1.00 PRE:1.00 LR:0.00000500


VALID LOSS:1.3921 ACC:0.68 F1:0.69 REC:0.69 PRE:0.69: 100%|██████████| 14/14 [00:03<00:00,  3.93it/s]


(Epoch 15) VALID LOSS:1.3921 ACC:0.68 F1:0.69 REC:0.69 PRE:0.69


(Epoch 16) TRAIN LOSS:0.0153 LR:0.00000500: 100%|██████████| 111/111 [00:49<00:00,  2.24it/s]


(Epoch 16) TRAIN LOSS:0.0153 ACC:1.00 F1:1.00 REC:1.00 PRE:1.00 LR:0.00000500


VALID LOSS:1.4660 ACC:0.68 F1:0.69 REC:0.69 PRE:0.69: 100%|██████████| 14/14 [00:03<00:00,  4.04it/s]


(Epoch 16) VALID LOSS:1.4660 ACC:0.68 F1:0.69 REC:0.69 PRE:0.69


(Epoch 17) TRAIN LOSS:0.0142 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.30it/s]


(Epoch 17) TRAIN LOSS:0.0142 ACC:1.00 F1:1.00 REC:1.00 PRE:1.00 LR:0.00000500


VALID LOSS:1.4602 ACC:0.69 F1:0.70 REC:0.70 PRE:0.70: 100%|██████████| 14/14 [00:03<00:00,  4.33it/s]


(Epoch 17) VALID LOSS:1.4602 ACC:0.69 F1:0.70 REC:0.70 PRE:0.70


(Epoch 18) TRAIN LOSS:0.0099 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.29it/s]


(Epoch 18) TRAIN LOSS:0.0099 ACC:1.00 F1:1.00 REC:1.00 PRE:1.00 LR:0.00000500


VALID LOSS:1.5294 ACC:0.68 F1:0.69 REC:0.69 PRE:0.71: 100%|██████████| 14/14 [00:03<00:00,  4.14it/s]


(Epoch 18) VALID LOSS:1.5294 ACC:0.68 F1:0.69 REC:0.69 PRE:0.71


(Epoch 19) TRAIN LOSS:0.0136 LR:0.00000500: 100%|██████████| 111/111 [00:49<00:00,  2.22it/s]


(Epoch 19) TRAIN LOSS:0.0136 ACC:1.00 F1:1.00 REC:1.00 PRE:1.00 LR:0.00000500


VALID LOSS:1.5607 ACC:0.69 F1:0.70 REC:0.70 PRE:0.70: 100%|██████████| 14/14 [00:03<00:00,  3.86it/s]


(Epoch 19) VALID LOSS:1.5607 ACC:0.69 F1:0.70 REC:0.70 PRE:0.70


(Epoch 20) TRAIN LOSS:0.0110 LR:0.00000500: 100%|██████████| 111/111 [00:48<00:00,  2.29it/s]


(Epoch 20) TRAIN LOSS:0.0110 ACC:1.00 F1:1.00 REC:1.00 PRE:1.00 LR:0.00000500


VALID LOSS:1.5008 ACC:0.70 F1:0.71 REC:0.71 PRE:0.72: 100%|██████████| 14/14 [00:03<00:00,  4.36it/s]

(Epoch 20) VALID LOSS:1.5008 ACC:0.70 F1:0.71 REC:0.71 PRE:0.72





In [None]:
# Evaluate on test
model.eval()
torch.set_grad_enabled(False)

total_loss, total_correct, total_labels = 0, 0, 0
list_hyp, list_label = [], []

pbar = tqdm(test_loader, leave=True, total=len(test_loader))
for i, batch_data in enumerate(pbar):
    _, batch_hyp, _ = forward_sequence_classification(model, batch_data[:-1], i2w=i2w, device='cuda')
    list_hyp += batch_hyp

# Save prediction
df = pd.DataFrame({'label':list_hyp}).reset_index()
df.to_csv('pred.txt', index=False)

print(df)

100%|██████████| 14/14 [00:03<00:00,  4.16it/s]

     index    label
0        0     love
1        1     fear
2        2     fear
3        3    happy
4        4    happy
..     ...      ...
435    435  sadness
436    436  sadness
437    437     fear
438    438  sadness
439    439    happy

[440 rows x 2 columns]





# Test fine-tuned model on sample sentences

In [None]:
state_dict = torch.load("emot-fine-tuned.pth")
model.load_state_dict(state_dict)

<All keys matched successfully>

In [None]:
text = 'Bahagia hatiku melihat pernikahan putri sulungku yang cantik jelita'
subwords = tokenizer.encode(text)
subwords = torch.LongTensor(subwords).view(1, -1).to(model.device)

logits = model(subwords)[0]
label = torch.topk(logits, k=1, dim=-1)[1].squeeze().item()

print(f'Text: {text} | Label : {i2w[label]} ({F.softmax(logits, dim=-1).squeeze()[label] * 100:.3f}%)')

Text: Bahagia hatiku melihat pernikahan putri sulungku yang cantik jelita | Label : happy (99.458%)


In [None]:
text = 'Budi pergi ke pondok indah mall membeli cakwe'
subwords = tokenizer.encode(text)
subwords = torch.LongTensor(subwords).view(1, -1).to(model.device)

logits = model(subwords)[0]
label = torch.topk(logits, k=1, dim=-1)[1].squeeze().item()

print(f'Text: {text} | Label : {i2w[label]} ({F.softmax(logits, dim=-1).squeeze()[label] * 100:.3f}%)')

Text: Budi pergi ke pondok indah mall membeli cakwe | Label : sadness (99.281%)


In [None]:
text = 'Dasar anak sialan!! Kurang ajar!!'
subwords = tokenizer.encode(text)
subwords = torch.LongTensor(subwords).view(1, -1).to(model.device)

logits = model(subwords)[0]
label = torch.topk(logits, k=1, dim=-1)[1].squeeze().item()

print(f'Text: {text} | Label : {i2w[label]} ({F.softmax(logits, dim=-1).squeeze()[label] * 100:.3f}%)')

Text: Dasar anak sialan!! Kurang ajar!! | Label : anger (99.868%)


In [None]:
torch.save(model.state_dict(), "emot-fine-tuned.pth")

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import shutil

In [None]:
shutil.copy("tweet.pth","/content/drive/MyDrive/bert_model")

'/content/drive/MyDrive/bert_model/tweet.pth'