Transformers Bert sentiment classification code. Featuring:

- cross entropy loss (pretty standard)
- a scheduler for dynamic lr
- using CLS tokens only for classification

In [3]:
import os
os.chdir('../experiment_tracking')

import warnings

warnings.filterwarnings('ignore')

from data_preprocessing import train_data_loader, test_data_loader, dev_data_loader,\
                                MODEL_NAME, class_names, train_size, dev_size

import torch
from torch import nn, optim
from transformers import BertModel, AdamW, get_linear_schedule_with_warmup
import numpy as np
from model import SentimentClassifier
from collections import defaultdict

2024-02-26 13:32:36.229903: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-26 13:32:36.387440: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-26 13:32:37.339349: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/opt/cuda/lib64:/opt/cuda/lib:/opt/cuda/lib64:/opt/cuda/lib
2024-02-26 13:32:37.339468: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dl

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
EPOCHS = 5


def optimizer_scheduler(model):
    # Optimizer Adam 
    optimizer = AdamW(model.parameters(), lr=1e-5, correct_bias=False)
    
    total_steps = len(train_data_loader) * EPOCHS
    
    print("Number of training steps = ", total_steps)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=5,
        num_training_steps=total_steps
    )
    
    return optimizer, scheduler
    
# Set the loss function 
loss_fn = nn.CrossEntropyLoss().to(device)


In [6]:
# Function for a single training epoch
def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):
    model = model.train()
    losses = []
    correct_predictions = 0
    
    for d in data_loader:
        input_ids = d["input_ids"].to(device)
        attention_mask = d["attention_mask"].to(device)
        labels = d["labels"].to(device)
        
        # Forward pass
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        
        _, preds = torch.max(outputs, dim=1)
        loss = loss_fn(outputs, labels)
        correct_predictions += torch.sum(preds == labels)
        losses.append(loss.item())
        
        # Backward prop
        loss.backward()
        
        # Gradient Descent
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
    
    return correct_predictions.double() / n_examples, np.mean(losses)


def eval_model(model, data_loader, loss_fn, device, n_examples):
    model = model.eval()
    
    losses = []
    correct_predictions = 0
    
    with torch.no_grad():
        for d in data_loader:
            input_ids = d["input_ids"].to(device)
            attention_mask = d["attention_mask"].to(device)
            labels = d["labels"].to(device)
            
            # Get model ouptuts
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )
            
            _, preds = torch.max(outputs, dim=1)
            loss = loss_fn(outputs, labels)
            
            correct_predictions += torch.sum(preds == labels)
            losses.append(loss.item())
            
    return correct_predictions.double() / n_examples, np.mean(losses)


In [7]:
model = SentimentClassifier(len(class_names))
model = model.to(device)
optimizer, scheduler = optimizer_scheduler(model)

Number of training steps =  110


In [8]:
%%time

history = defaultdict(list)

for epoch in range(EPOCHS):
    
    # Show details 
    print(f"Epoch {epoch + 1}/{EPOCHS}")
    print("-" * 10)
    
    train_acc, train_loss = train_epoch(
        model,
        train_data_loader,
        loss_fn,
        optimizer,
        device,
        scheduler,
        train_size
    )
    
    print(f"Train loss {train_loss} accuracy {train_acc}")
    
    # Get model performance (accuracy and loss)
    val_acc, val_loss = eval_model(
        model,
        dev_data_loader,
        loss_fn,
        device,
        dev_size
    )
    
    print(f"Val   loss {val_loss} accuracy {val_acc}")
    print()
    
    history['train_acc'].append(train_acc)
    history['train_loss'].append(train_loss)
    history['val_acc'].append(val_acc)
    history['val_loss'].append(val_loss)
    

Epoch 1/5
----------
Train loss 0.6029298278418455 accuracy 0.6714285714285714
Val   loss 0.25251230384622303 accuracy 0.905

Epoch 2/5
----------
Train loss 0.1871497207744555 accuracy 0.9371428571428572
Val   loss 0.18443297701222555 accuracy 0.925

Epoch 3/5
----------
Train loss 0.05785479307682677 accuracy 0.9828571428571429
Val   loss 0.24171212062771832 accuracy 0.92

Epoch 4/5
----------
Train loss 0.05630385230126029 accuracy 0.9871428571428571
Val   loss 0.2721977095186178 accuracy 0.92

Epoch 5/5
----------
Train loss 0.020318325704217634 accuracy 0.9957142857142857
Val   loss 0.2258288101958377 accuracy 0.9400000000000001

CPU times: user 50.7 s, sys: 14.3 s, total: 1min 4s
Wall time: 1min 6s
