# Imports

In [1]:
# <your imports>
import numpy as np
import torch
from torch.optim import Adam
import torch.nn as nn
from tqdm.notebook import tqdm
from omegaconf import OmegaConf
import pandas as pd
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.model_selection import train_test_split
from transformers import TrainingArguments, Trainer
from transformers import BertForSequenceClassification, XLMRobertaXLConfig
from transformers import EarlyStoppingCallback

from preprocessing import Preprocessing
from matplotlib import pyplot as plt

# models
from models import BertClassifier
from transformers import BertTokenizer

# ignore fucking warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
datapath = 'data/data_ruSentNE_lemmatized.csv'
df = pd.read_csv(datapath)
mapping = {
    -1: 0,
    0: 1,
    1: 2
}

df["label"] = df["category"].map(mapping)
        
print(f"Unique labels: {np.unique(df.label)}")
# df.head()

Unique labels: [0 1 2]


In [3]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
labels = {
    -1: 0,
    0: 1,
    1: 2
}

class Dataset(torch.utils.data.Dataset):

    def __init__(self, df, equalize=False):

        self.labels = df['label'].values
        self.texts = [tokenizer(text, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for text in df['text']]
        if equalize:
            _, counts = np.unique(df['label'].values, return_counts=True)
            
    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):

        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_texts, batch_y

In [4]:
np.random.seed(112)
df_train, df_val = train_test_split(
                df,
                test_size=0.2,
                random_state=42,
                shuffle=True,
                stratify=df["label"],
            )

print(len(df_train),len(df_val))

7585 1897


In [19]:
np.unique(df_train.label, return_counts=True)

(array([0, 1, 2]), array([1156, 5455,  974]))

In [14]:
with open("metric_test.txt", 'w'):
    pass

def custom_f1(pred, labels):
    pred = np.argmax(pred.cpu().numpy(), axis=1)
    with open("metric_test.txt", 'a') as f:
        f.write(" ".join(map(str, labels.cpu().numpy())))
        f.write("\n")
        f.write(" ".join(map(str, pred)))
        f.write("\n")
        f.write("\n")
    f1_scores = f1_score(y_true=labels.cpu().numpy(), y_pred=pred, average=None)
    final_score = np.mean(f1_scores[1:])
    # return {"custom F1 score": final_score}
    return {
        "F1 for class 0": f1_scores[0],
        "F1 for class 1": f1_scores[1],
        "F1 for class 2": f1_scores[2]
    }

In [15]:
# for i in train_dl:
#     print(i)
#     break

In [17]:
def train(model, train_data, val_data, learning_rate, epochs):
    train, val = Dataset(train_data), Dataset(val_data)
    
    train_dataloader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=64)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr= learning_rate)
    if use_cuda:

            model = model.cuda()
            criterion = criterion.cuda()
    
    for epoch_num in tqdm(range(epochs)):

            total_acc_train = 0
            total_loss_train = 0
            count=0
            for train_input, train_label in tqdm(train_dataloader):
                count+=1
                train_label = train_label.to(device)
                mask = train_input['attention_mask'].to(device)
                input_id = train_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask)
                
                batch_loss = criterion(output, train_label.long())
                total_loss_train += batch_loss.item()
                
                acc = (output.argmax(dim=1) == train_label).sum().item()
                total_acc_train += acc

                model.zero_grad()
                batch_loss.backward()
                optimizer.step()
                if count >= 20:
                    break
            total_acc_val = 0
            total_loss_val = 0

            with torch.no_grad():

                for val_input, val_label in val_dataloader:

                    val_label = val_label.to(device)
                    mask = val_input['attention_mask'].to(device)
                    input_id = val_input['input_ids'].squeeze(1).to(device)

                    output = model(input_id, mask)

                    batch_loss = criterion(output, val_label.long())
                    total_loss_val += batch_loss.item()
                    
                    acc = (output.argmax(dim=1) == val_label).sum().item()
                    total_acc_val += acc
                    
                    metrics = custom_f1(output, val_label)
            print(
                f"Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} \
                \n| Val Loss: {total_loss_val / len(val_data): .3f} \
                \n| F1 for 0 class: {metrics['F1 for class 0']} \
                \n| F1 for 1 class: {metrics['F1 for class 1']} \
                \n| F1 for 2 class: {metrics['F1 for class 2']} \
                \n| Train Accuracy: {total_acc_train / len(train_data): .3f} \
                \n| Val Accuracy: {total_acc_val / len(val_data): .3f}")
                  
EPOCHS = 5
model = BertClassifier(
    num_classes=3
)
LR = 1e-6

train(model, df_train, df_val, LR, EPOCHS)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/238 [00:00<?, ?it/s]

Epochs: 1 | Train Loss:  0.003                 
| Val Loss:  0.017                 
| F1 for 0 class: 0.28571428571428575                 
| F1 for 1 class: 0.368421052631579                 
| F1 for 2 class: 0.25                 
| Train Accuracy:  0.021                 
| Val Accuracy:  0.271


  0%|          | 0/238 [00:00<?, ?it/s]

Epochs: 2 | Train Loss:  0.003                 
| Val Loss:  0.017                 
| F1 for 0 class: 0.25806451612903225                 
| F1 for 1 class: 0.42105263157894735                 
| F1 for 2 class: 0.0                 
| Train Accuracy:  0.026                 
| Val Accuracy:  0.326


  0%|          | 0/238 [00:00<?, ?it/s]

Epochs: 3 | Train Loss:  0.003                 
| Val Loss:  0.017                 
| F1 for 0 class: 0.375                 
| F1 for 1 class: 0.4                 
| F1 for 2 class: 0.2                 
| Train Accuracy:  0.029                 
| Val Accuracy:  0.376


  0%|          | 0/238 [00:00<?, ?it/s]

Epochs: 4 | Train Loss:  0.003                 
| Val Loss:  0.016                 
| F1 for 0 class: 0.1739130434782609                 
| F1 for 1 class: 0.627450980392157                 
| F1 for 2 class: 0.0                 
| Train Accuracy:  0.038                 
| Val Accuracy:  0.467


  0%|          | 0/238 [00:00<?, ?it/s]

Epochs: 5 | Train Loss:  0.003                 
| Val Loss:  0.015                 
| F1 for 0 class: 0.13333333333333333                 
| F1 for 1 class: 0.7000000000000001                 
| F1 for 2 class: 0.0                 
| Train Accuracy:  0.047                 
| Val Accuracy:  0.623
