In [1]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
from transformers import AdamW, get_linear_schedule_with_warmup
from model import SentimentClassifierUntrainedCLSMultiLastLayersMultiLinear
from data_preprocessing import create_data_loader,\
                                tokenizer,\
                                df_train,\
                                df_dev,\
                                df_test,\
                                dev_data_loader,\
                                test_data_loader,\
                                dev_size,\
                                MAX_LEN,\
                                BATCH_SIZE,\
                                class_names

import pandas as pd
from train import DEVICE, optimizer_scheduler, train_model, train_epoch, eval_model, loss_fn
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("bert-restaurant-sentiment-classification")

<Experiment: artifact_location='/home/tung/units/python_dev/tung_aimesoft_solution/repo/codes/experiment_tracking/mlruns/1', creation_time=1708886061134, experiment_id='1', last_update_time=1708886061134, lifecycle_stage='active', name='bert-restaurant-sentiment-classification', tags={}>

In [5]:
df_train_total = pd.concat([df_train, df_dev])
total_train_data_loader = create_data_loader(df_train_total, tokenizer, MAX_LEN, BATCH_SIZE)
train_size = df_train_total.shape[0]

In [6]:
def optimizer_scheduler(model):
    
    # Optimizer Adam 
    optimizer = AdamW(model.parameters(), lr=1e-5, correct_bias=False)
    
    total_steps = len(total_train_data_loader) * EPOCHS
    
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=5,
        num_training_steps=total_steps
    )
    
    return optimizer, scheduler


In [7]:
from collections import defaultdict

EPOCHS = 3

model = SentimentClassifierUntrainedCLSMultiLastLayersMultiLinear(len(class_names))
model = model.to(DEVICE)
optimizer, scheduler = optimizer_scheduler(model)

with mlflow.start_run():
    mlflow.set_tag("Model class", "SentimentClassifierUntrainedCLSMultiLastLayersMultiLinear")

    history = defaultdict(list)
    best_accuracy = 0
    best_epoch = 0
    
    for epoch in range(EPOCHS):
        
        # Show details 
        print(f"Epoch {epoch + 1}/{EPOCHS}")
        print("-" * 10)
        
        train_acc, train_loss = train_epoch(
            model,
            total_train_data_loader,
            loss_fn,
            optimizer,
            DEVICE,
            scheduler,
            train_size,
            epoch
        )
        
        print(f"Train loss {train_loss} accuracy {train_acc}")
        
        # Get model performance (accuracy and loss)
        val_acc, val_loss = eval_model(
            model,
            dev_data_loader,
            loss_fn,
            DEVICE,
            dev_size
        )
        
        print(f"Val   loss {val_loss} accuracy {val_acc}")
        print()
        
        
        mlflow.log_metric("Train Loss", train_loss, step=epoch)
        mlflow.log_metric("Val  Loss", val_loss, step=epoch)
        mlflow.log_metric("Train Accuracy", train_acc, step=epoch)
        mlflow.log_metric("Val Accuracy", val_acc, step=epoch)

        
        history['train_acc'].append(train_acc)
        history['train_loss'].append(train_loss)
        history['val_acc'].append(val_acc)
        history['val_loss'].append(val_loss)
        
        # If we beat prev performance
        if val_acc > best_accuracy:
            best_accuracy = val_acc
            best_epoch = epoch
            mlflow.pytorch.log_model(
                model, artifact_path="{}-{}".format(best_epoch, best_accuracy), 
            )
                


Epoch 1/3
----------
Train loss 0.5344890259720129 accuracy 0.6844444444444444
Val   loss 0.11130647361278534 accuracy 0.96

Epoch 2/3
----------
Train loss 0.14699410299513618 accuracy 0.9355555555555556
Val   loss 0.011812818835356407 accuracy 1.0

Epoch 3/3
----------
Train loss 0.030995305675756316 accuracy 0.9911111111111112
Val   loss 0.00349374017345586 accuracy 1.0



Inference on test set:

In [10]:
from train import eval_model

acc, loss = eval_model(model, test_data_loader, loss_fn, DEVICE, len(df_test))

# Confusion matrix

In [15]:
from inference import get_predictions

y_texts, y_pred, y_pred_probs, y_test = get_predictions(
    model,
    test_data_loader,
    DEVICE
)


Accuracy: 0.9700


In [17]:
from sklearn.metrics import confusion_matrix, classification_report


print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.98      0.96      0.97        50
           1       0.96      0.98      0.97        50

    accuracy                           0.97       100
   macro avg       0.97      0.97      0.97       100
weighted avg       0.97      0.97      0.97       100

[[48  2]
 [ 1 49]]


In [16]:
import torch
import gc
del(model)
torch.cuda.empty_cache()
gc.collect()

4565