In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/json-data/assignment_3_ai_tutors_dataset.json


In [2]:
import torch
import torch.nn as nn
from transformers import BertForSequenceClassification, BertTokenizer
from sklearn.metrics import accuracy_score, f1_score, classification_report
from torch.utils.data import DataLoader
from tqdm import tqdm
import pandas as pd
import json
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split

2025-04-24 18:41:54.253146: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745520114.453637      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745520114.512685      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
class FocalLossWithWeights(nn.Module):
    def __init__(self, weight=None, gamma=2.0):
        super(FocalLossWithWeights, self).__init__()
        self.weight = weight
        self.gamma = gamma
        self.ce = nn.CrossEntropyLoss(reduction='none', weight=weight)

    def forward(self, inputs, targets):
        logp = self.ce(inputs, targets)
        p = torch.exp(-logp)
        loss = (1 - p) ** self.gamma * logp
        return loss.mean()


In [5]:
# ---------- Dataset Definition ----------
class TutorEvalSingleTaskDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        return {
            'input_ids': self.encodings['input_ids'][idx],
            'attention_mask': self.encodings['attention_mask'][idx],
            'labels': self.labels[idx]
        }

    def __len__(self):
        return len(self.labels)

In [6]:
# ---------- Preprocessing ----------
def load_and_flatten(json_path):
    with open(json_path) as f:
        data = json.load(f)

    rows = []
    for instance in data:
        convo_id = instance["conversation_id"]
        history = instance["conversation_history"]
        for tutor_id, tutor_data in instance["tutor_responses"].items():
            row = {
                "conversation_id": convo_id,
                "tutor_id": tutor_id,
                "conversation_history": history,
                "tutor_response": tutor_data["response"],
                "Mistake_Identification": tutor_data["annotation"]["Mistake_Identification"],
                "Mistake_Location": tutor_data["annotation"]["Mistake_Location"],
                "Pedagogical_Guidance": tutor_data["annotation"]["Providing_Guidance"],
                "Actionability": tutor_data["annotation"]["Actionability"]
            }
            rows.append(row)
    return pd.DataFrame(rows)

In [7]:
def build_input_text(row):
    return f"Context:\n{row['conversation_history']}\n\nTutor Response:\n{row['tutor_response']}"

LABEL_MAP = {"Yes": 0, "To some extent": 1, "No": 2}
MERGED_LABEL_MAP = {"Yes": 1, "To some extent": 1, "No": 0}

def encode_labels(df):
    for task in ["Mistake_Identification", "Mistake_Location", "Pedagogical_Guidance", "Actionability"]:
        df[f"{task}_label"] = df[task].map(LABEL_MAP)
        df[f"{task}_binary"] = df[task].map(MERGED_LABEL_MAP)
    return df

def tokenize_inputs(tokenizer, texts, max_length=256):
    return tokenizer(
        texts,
        add_special_tokens=True,
        truncation=True,
        padding=True,
        max_length=max_length,
        return_tensors="pt"
    )

In [8]:
def preprocess_dataset(json_path, task_label):
    df = load_and_flatten(json_path)
    df["input_text"] = df.apply(build_input_text, axis=1)
    df = encode_labels(df)

    train_df, val_df = train_test_split(df, test_size=0.1, stratify=df[task_label], random_state=42)

    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True)
    train_enc = tokenize_inputs(tokenizer, train_df["input_text"].tolist())
    val_enc = tokenize_inputs(tokenizer, val_df["input_text"].tolist())

    train_labels = torch.tensor(train_df[task_label].tolist())
    val_labels = torch.tensor(val_df[task_label].tolist())

    train_dataset = TutorEvalSingleTaskDataset(train_enc, train_labels)
    val_dataset = TutorEvalSingleTaskDataset(val_enc, val_labels)

    return train_dataset, val_dataset, tokenizer, df


In [9]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

def get_class_weights(labels, num_classes):
    class_weights = compute_class_weight(class_weight='balanced', classes=np.arange(num_classes), y=labels)
    return torch.tensor(class_weights, dtype=torch.float).to(device)


In [10]:
# ---------- Model ----------
class SingleTaskBertClassifier(nn.Module):
    def __init__(self, num_labels):
        super().__init__()
        self.bert = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)

    def forward(self, input_ids, attention_mask):
        return self.bert(input_ids=input_ids, attention_mask=attention_mask).logits

In [11]:
# ---------- Training ----------
def evaluate_model(model, val_loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].cpu().numpy()

            outputs = model(input_ids, attention_mask)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels)

    acc = accuracy_score(all_labels, all_preds)
    macro_f1 = f1_score(all_labels, all_preds, average='macro')
    print(f"Validation Accuracy: {acc:.4f}  Validation Macro F1: {macro_f1:.4f}")
    print(classification_report(all_labels, all_preds, target_names=["Yes", "To some extent", "No"], zero_division=0))

def train_model(loss_type, train_loader, val_loader, num_labels, epochs=15):
    print(f"\n🔁 Training with: {loss_type.upper()} Loss\n")

    model = SingleTaskBertClassifier(num_labels).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)

    train_labels_list = [label.item() for batch in train_loader for label in batch['labels']]
    class_weights = get_class_weights(train_labels_list, num_labels)

    if loss_type == "focal":
        criterion = FocalLossWithWeights(class_weights)
    elif loss_type == "smoothing":
        criterion = nn.CrossEntropyLoss(weight=class_weights, label_smoothing=0.1)
    else:
        criterion = nn.CrossEntropyLoss(weight=class_weights)


    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1} Loss: {avg_loss:.4f}")
        evaluate_model(model, val_loader)



In [12]:
# ---------- Run ----------
json_path = "/kaggle/input/json-data/assignment_3_ai_tutors_dataset.json"
train_dataset, val_dataset, tokenizer, df = preprocess_dataset(json_path, "Mistake_Location_label")
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4)



tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [13]:
df

Unnamed: 0,conversation_id,tutor_id,conversation_history,tutor_response,Mistake_Identification,Mistake_Location,Pedagogical_Guidance,Actionability,input_text,Mistake_Identification_label,Mistake_Identification_binary,Mistake_Location_label,Mistake_Location_binary,Pedagogical_Guidance_label,Pedagogical_Guidance_binary,Actionability_label,Actionability_binary
0,221-362eb11a-f190-42a6-b2a4-985fafdcfa9e,Sonnet,"Tutor: Hi, could you please provide a step-by-...","Great, you've correctly identified the cost of...",Yes,Yes,Yes,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,0,1,0,1
1,221-362eb11a-f190-42a6-b2a4-985fafdcfa9e,Llama318B,"Tutor: Hi, could you please provide a step-by-...",Now that we know the cost of 1 pound of meat i...,Yes,To some extent,To some extent,To some extent,"Context:\nTutor: Hi, could you please provide ...",0,1,1,1,1,1,1,1
2,221-362eb11a-f190-42a6-b2a4-985fafdcfa9e,Llama31405B,"Tutor: Hi, could you please provide a step-by-...","You're close, but I notice that you calculated...",Yes,Yes,Yes,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,0,1,0,1
3,221-362eb11a-f190-42a6-b2a4-985fafdcfa9e,GPT4,"Tutor: Hi, could you please provide a step-by-...","That's correct. So, if 1 pound of meat costs $...",Yes,Yes,Yes,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,0,1,0,1
4,221-362eb11a-f190-42a6-b2a4-985fafdcfa9e,Mistral,"Tutor: Hi, could you please provide a step-by-...",It seems like you've calculated the cost as if...,Yes,Yes,Yes,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2471,5910-25617a89-a4ae-47bb-8812-d6b39fa4e691,Mistral,"Tutor: Hi, could you please provide a step-by-...",It seems there might be a misunderstanding in ...,Yes,Yes,To some extent,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,1,1,0,1
2472,5910-25617a89-a4ae-47bb-8812-d6b39fa4e691,Phi3,"Tutor: Hi, could you please provide a step-by-...","To solve this problem, we need to add the numb...",No,No,No,No,"Context:\nTutor: Hi, could you please provide ...",2,0,2,0,2,0,2,0
2473,5910-25617a89-a4ae-47bb-8812-d6b39fa4e691,Sonnet,"Tutor: Hi, could you please provide a step-by-...",That's a great start and I like how you worked...,Yes,Yes,Yes,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,0,1,0,1
2474,5910-25617a89-a4ae-47bb-8812-d6b39fa4e691,Expert,"Tutor: Hi, could you please provide a step-by-...",Okay. So Hector gave 5 less than four times as...,Yes,Yes,Yes,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,0,1,0,1


In [14]:
for loss_name in ["ce", "smoothing", "focal"]:
    train_model(loss_name, train_loader, val_loader, num_labels=3)


🔁 Training with: CE Loss



Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/15: 100%|██████████| 557/557 [01:56<00:00,  4.77it/s]


Epoch 1 Loss: 1.0364
Validation Accuracy: 0.5968  Validation Macro F1: 0.3697
                precision    recall  f1-score   support

           Yes       0.68      0.78      0.73       155
To some extent       0.00      0.00      0.00        22
            No       0.38      0.38      0.38        71

      accuracy                           0.60       248
     macro avg       0.35      0.39      0.37       248
  weighted avg       0.54      0.60      0.56       248



Epoch 2/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 2 Loss: 0.9692
Validation Accuracy: 0.4597  Validation Macro F1: 0.3450
                precision    recall  f1-score   support

           Yes       0.82      0.32      0.46       155
To some extent       0.20      0.05      0.07        22
            No       0.35      0.89      0.50        71

      accuracy                           0.46       248
     macro avg       0.46      0.42      0.35       248
  weighted avg       0.63      0.46      0.44       248



Epoch 3/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 3 Loss: 0.8921
Validation Accuracy: 0.5645  Validation Macro F1: 0.4363
                precision    recall  f1-score   support

           Yes       0.70      0.70      0.70       155
To some extent       0.15      0.27      0.19        22
            No       0.49      0.37      0.42        71

      accuracy                           0.56       248
     macro avg       0.45      0.45      0.44       248
  weighted avg       0.59      0.56      0.57       248



Epoch 4/15: 100%|██████████| 557/557 [02:04<00:00,  4.46it/s]


Epoch 4 Loss: 0.8394
Validation Accuracy: 0.5444  Validation Macro F1: 0.4007
                precision    recall  f1-score   support

           Yes       0.77      0.55      0.64       155
To some extent       0.06      0.05      0.05        22
            No       0.40      0.69      0.51        71

      accuracy                           0.54       248
     macro avg       0.41      0.43      0.40       248
  weighted avg       0.60      0.54      0.55       248



Epoch 5/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 5 Loss: 0.7851
Validation Accuracy: 0.4556  Validation Macro F1: 0.4033
                precision    recall  f1-score   support

           Yes       0.74      0.49      0.59       155
To some extent       0.17      0.73      0.27        22
            No       0.42      0.30      0.35        71

      accuracy                           0.46       248
     macro avg       0.44      0.50      0.40       248
  weighted avg       0.60      0.46      0.49       248



Epoch 6/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 6 Loss: 0.7396
Validation Accuracy: 0.5161  Validation Macro F1: 0.4487
                precision    recall  f1-score   support

           Yes       0.75      0.54      0.62       155
To some extent       0.20      0.45      0.28        22
            No       0.40      0.49      0.44        71

      accuracy                           0.52       248
     macro avg       0.45      0.49      0.45       248
  weighted avg       0.60      0.52      0.54       248



Epoch 7/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 7 Loss: 0.7161
Validation Accuracy: 0.5242  Validation Macro F1: 0.4520
                precision    recall  f1-score   support

           Yes       0.75      0.57      0.65       155
To some extent       0.20      0.55      0.29        22
            No       0.41      0.41      0.41        71

      accuracy                           0.52       248
     macro avg       0.46      0.51      0.45       248
  weighted avg       0.61      0.52      0.55       248



Epoch 8/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 8 Loss: 0.6859
Validation Accuracy: 0.5766  Validation Macro F1: 0.4703
                precision    recall  f1-score   support

           Yes       0.71      0.66      0.68       155
To some extent       0.29      0.27      0.28        22
            No       0.42      0.48      0.45        71

      accuracy                           0.58       248
     macro avg       0.47      0.47      0.47       248
  weighted avg       0.59      0.58      0.58       248



Epoch 9/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 9 Loss: 0.6868
Validation Accuracy: 0.5323  Validation Macro F1: 0.4061
                precision    recall  f1-score   support

           Yes       0.68      0.64      0.66       155
To some extent       0.15      0.18      0.16        22
            No       0.39      0.41      0.40        71

      accuracy                           0.53       248
     macro avg       0.40      0.41      0.41       248
  weighted avg       0.55      0.53      0.54       248



Epoch 10/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 10 Loss: 0.6530
Validation Accuracy: 0.5282  Validation Macro F1: 0.4620
                precision    recall  f1-score   support

           Yes       0.75      0.56      0.64       155
To some extent       0.24      0.50      0.32        22
            No       0.38      0.46      0.42        71

      accuracy                           0.53       248
     macro avg       0.46      0.51      0.46       248
  weighted avg       0.60      0.53      0.55       248



Epoch 11/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 11 Loss: 0.6742
Validation Accuracy: 0.5726  Validation Macro F1: 0.4345
                precision    recall  f1-score   support

           Yes       0.69      0.72      0.70       155
To some extent       0.18      0.23      0.20        22
            No       0.45      0.37      0.40        71

      accuracy                           0.57       248
     macro avg       0.44      0.44      0.43       248
  weighted avg       0.57      0.57      0.57       248



Epoch 12/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 12 Loss: 0.6435
Validation Accuracy: 0.5363  Validation Macro F1: 0.4361
                precision    recall  f1-score   support

           Yes       0.73      0.63      0.67       155
To some extent       0.17      0.36      0.23        22
            No       0.41      0.39      0.40        71

      accuracy                           0.54       248
     macro avg       0.44      0.46      0.44       248
  weighted avg       0.59      0.54      0.56       248



Epoch 13/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 13 Loss: 0.6336
Validation Accuracy: 0.5000  Validation Macro F1: 0.4041
                precision    recall  f1-score   support

           Yes       0.72      0.58      0.64       155
To some extent       0.14      0.32      0.19        22
            No       0.38      0.38      0.38        71

      accuracy                           0.50       248
     macro avg       0.41      0.43      0.40       248
  weighted avg       0.57      0.50      0.53       248



Epoch 14/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 14 Loss: 0.6134
Validation Accuracy: 0.5766  Validation Macro F1: 0.4838
                precision    recall  f1-score   support

           Yes       0.75      0.60      0.67       155
To some extent       0.29      0.27      0.28        22
            No       0.43      0.62      0.51        71

      accuracy                           0.58       248
     macro avg       0.49      0.50      0.48       248
  weighted avg       0.62      0.58      0.59       248



Epoch 15/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 15 Loss: 0.6274
Validation Accuracy: 0.5806  Validation Macro F1: 0.4590
                precision    recall  f1-score   support

           Yes       0.74      0.70      0.72       155
To some extent       0.20      0.32      0.25        22
            No       0.43      0.39      0.41        71

      accuracy                           0.58       248
     macro avg       0.46      0.47      0.46       248
  weighted avg       0.60      0.58      0.59       248


🔁 Training with: SMOOTHING Loss



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 1 Loss: 1.2069
Validation Accuracy: 0.3508  Validation Macro F1: 0.2908
                precision    recall  f1-score   support

           Yes       0.77      0.42      0.54       155
To some extent       0.11      0.73      0.18        22
            No       0.50      0.08      0.14        71

      accuracy                           0.35       248
     macro avg       0.46      0.41      0.29       248
  weighted avg       0.64      0.35      0.40       248



Epoch 2/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 2 Loss: 1.1643
Validation Accuracy: 0.3145  Validation Macro F1: 0.3087
                precision    recall  f1-score   support

           Yes       0.79      0.27      0.40       155
To some extent       0.11      0.73      0.19        22
            No       0.40      0.28      0.33        71

      accuracy                           0.31       248
     macro avg       0.43      0.43      0.31       248
  weighted avg       0.62      0.31      0.36       248



Epoch 3/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 3 Loss: 1.1100
Validation Accuracy: 0.4879  Validation Macro F1: 0.4169
                precision    recall  f1-score   support

           Yes       0.76      0.50      0.61       155
To some extent       0.17      0.36      0.23        22
            No       0.35      0.49      0.41        71

      accuracy                           0.49       248
     macro avg       0.43      0.45      0.42       248
  weighted avg       0.59      0.49      0.52       248



Epoch 4/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 4 Loss: 1.0768
Validation Accuracy: 0.4476  Validation Macro F1: 0.3947
                precision    recall  f1-score   support

           Yes       0.76      0.48      0.58       155
To some extent       0.12      0.50      0.19        22
            No       0.46      0.37      0.41        71

      accuracy                           0.45       248
     macro avg       0.45      0.45      0.39       248
  weighted avg       0.62      0.45      0.50       248



Epoch 5/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 5 Loss: 1.0547
Validation Accuracy: 0.4556  Validation Macro F1: 0.3952
                precision    recall  f1-score   support

           Yes       0.76      0.51      0.61       155
To some extent       0.13      0.59      0.22        22
            No       0.46      0.30      0.36        71

      accuracy                           0.46       248
     macro avg       0.45      0.47      0.40       248
  weighted avg       0.62      0.46      0.50       248



Epoch 6/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 6 Loss: 1.0234
Validation Accuracy: 0.5363  Validation Macro F1: 0.4543
                precision    recall  f1-score   support

           Yes       0.76      0.60      0.67       155
To some extent       0.18      0.50      0.26        22
            No       0.45      0.41      0.43        71

      accuracy                           0.54       248
     macro avg       0.46      0.50      0.45       248
  weighted avg       0.62      0.54      0.57       248



Epoch 7/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 7 Loss: 1.0164
Validation Accuracy: 0.5565  Validation Macro F1: 0.4766
                precision    recall  f1-score   support

           Yes       0.73      0.61      0.66       155
To some extent       0.23      0.45      0.30        22
            No       0.45      0.48      0.46        71

      accuracy                           0.56       248
     macro avg       0.47      0.51      0.48       248
  weighted avg       0.61      0.56      0.57       248



Epoch 8/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 8 Loss: 0.9986
Validation Accuracy: 0.5282  Validation Macro F1: 0.4607
                precision    recall  f1-score   support

           Yes       0.79      0.53      0.63       155
To some extent       0.19      0.45      0.27        22
            No       0.42      0.55      0.48        71

      accuracy                           0.53       248
     macro avg       0.47      0.51      0.46       248
  weighted avg       0.63      0.53      0.56       248



Epoch 9/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 9 Loss: 0.9784
Validation Accuracy: 0.5363  Validation Macro F1: 0.4658
                precision    recall  f1-score   support

           Yes       0.76      0.57      0.65       155
To some extent       0.20      0.55      0.29        22
            No       0.46      0.45      0.45        71

      accuracy                           0.54       248
     macro avg       0.47      0.52      0.47       248
  weighted avg       0.62      0.54      0.56       248



Epoch 10/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 10 Loss: 0.9799
Validation Accuracy: 0.5444  Validation Macro F1: 0.4383
                precision    recall  f1-score   support

           Yes       0.71      0.66      0.68       155
To some extent       0.16      0.41      0.23        22
            No       0.48      0.34      0.40        71

      accuracy                           0.54       248
     macro avg       0.45      0.47      0.44       248
  weighted avg       0.60      0.54      0.56       248



Epoch 11/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 11 Loss: 0.9768
Validation Accuracy: 0.5363  Validation Macro F1: 0.4458
                precision    recall  f1-score   support

           Yes       0.74      0.63      0.68       155
To some extent       0.17      0.50      0.26        22
            No       0.47      0.35      0.40        71

      accuracy                           0.54       248
     macro avg       0.46      0.49      0.45       248
  weighted avg       0.61      0.54      0.56       248



Epoch 12/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 12 Loss: 0.9652
Validation Accuracy: 0.5363  Validation Macro F1: 0.4654
                precision    recall  f1-score   support

           Yes       0.75      0.56      0.64       155
To some extent       0.22      0.45      0.30        22
            No       0.41      0.51      0.46        71

      accuracy                           0.54       248
     macro avg       0.46      0.51      0.47       248
  weighted avg       0.61      0.54      0.56       248



Epoch 13/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 13 Loss: 0.9614
Validation Accuracy: 0.5605  Validation Macro F1: 0.4649
                precision    recall  f1-score   support

           Yes       0.75      0.65      0.70       155
To some extent       0.21      0.45      0.29        22
            No       0.42      0.39      0.41        71

      accuracy                           0.56       248
     macro avg       0.46      0.50      0.46       248
  weighted avg       0.61      0.56      0.58       248



Epoch 14/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 14 Loss: 0.9600
Validation Accuracy: 0.5444  Validation Macro F1: 0.4509
                precision    recall  f1-score   support

           Yes       0.74      0.64      0.69       155
To some extent       0.18      0.50      0.27        22
            No       0.46      0.35      0.40        71

      accuracy                           0.54       248
     macro avg       0.46      0.50      0.45       248
  weighted avg       0.61      0.54      0.57       248



Epoch 15/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 15 Loss: 0.9529
Validation Accuracy: 0.5363  Validation Macro F1: 0.4279
                precision    recall  f1-score   support

           Yes       0.70      0.66      0.68       155
To some extent       0.17      0.41      0.24        22
            No       0.44      0.31      0.36        71

      accuracy                           0.54       248
     macro avg       0.44      0.46      0.43       248
  weighted avg       0.58      0.54      0.55       248


🔁 Training with: FOCAL Loss



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 1 Loss: 0.5925
Validation Accuracy: 0.2661  Validation Macro F1: 0.2456
                precision    recall  f1-score   support

           Yes       0.82      0.26      0.39       155
To some extent       0.10      0.86      0.19        22
            No       0.39      0.10      0.16        71

      accuracy                           0.27       248
     macro avg       0.44      0.41      0.25       248
  weighted avg       0.63      0.27      0.31       248



Epoch 2/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 2 Loss: 0.5485
Validation Accuracy: 0.2903  Validation Macro F1: 0.2512
                precision    recall  f1-score   support

           Yes       0.83      0.06      0.12       155
To some extent       0.17      0.32      0.22        22
            No       0.28      0.77      0.42        71

      accuracy                           0.29       248
     macro avg       0.43      0.39      0.25       248
  weighted avg       0.62      0.29      0.21       248



Epoch 3/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 3 Loss: 0.4887
Validation Accuracy: 0.3024  Validation Macro F1: 0.2928
                precision    recall  f1-score   support

           Yes       0.85      0.26      0.40       155
To some extent       0.13      0.91      0.23        22
            No       0.33      0.21      0.26        71

      accuracy                           0.30       248
     macro avg       0.44      0.46      0.29       248
  weighted avg       0.64      0.30      0.34       248



Epoch 4/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 4 Loss: 0.4454
Validation Accuracy: 0.3387  Validation Macro F1: 0.3346
                precision    recall  f1-score   support

           Yes       0.88      0.25      0.38       155
To some extent       0.15      0.91      0.26        22
            No       0.35      0.37      0.36        71

      accuracy                           0.34       248
     macro avg       0.46      0.51      0.33       248
  weighted avg       0.67      0.34      0.37       248



Epoch 5/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 5 Loss: 0.4109
Validation Accuracy: 0.3871  Validation Macro F1: 0.3751
                precision    recall  f1-score   support

           Yes       0.86      0.28      0.42       155
To some extent       0.17      0.77      0.27        22
            No       0.38      0.51      0.43        71

      accuracy                           0.39       248
     macro avg       0.47      0.52      0.38       248
  weighted avg       0.66      0.39      0.41       248



Epoch 6/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 6 Loss: 0.3904
Validation Accuracy: 0.3710  Validation Macro F1: 0.3599
                precision    recall  f1-score   support

           Yes       0.82      0.27      0.41       155
To some extent       0.17      0.77      0.27        22
            No       0.35      0.46      0.40        71

      accuracy                           0.37       248
     macro avg       0.45      0.50      0.36       248
  weighted avg       0.63      0.37      0.39       248



Epoch 7/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 7 Loss: 0.3622
Validation Accuracy: 0.3589  Validation Macro F1: 0.3437
                precision    recall  f1-score   support

           Yes       0.80      0.28      0.42       155
To some extent       0.15      0.68      0.24        22
            No       0.33      0.42      0.37        71

      accuracy                           0.36       248
     macro avg       0.42      0.46      0.34       248
  weighted avg       0.61      0.36      0.39       248



Epoch 8/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 8 Loss: 0.3618
Validation Accuracy: 0.3548  Validation Macro F1: 0.3413
                precision    recall  f1-score   support

           Yes       0.78      0.29      0.42       155
To some extent       0.16      0.77      0.26        22
            No       0.31      0.37      0.34        71

      accuracy                           0.35       248
     macro avg       0.42      0.48      0.34       248
  weighted avg       0.59      0.35      0.38       248



Epoch 9/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 9 Loss: 0.3482
Validation Accuracy: 0.3750  Validation Macro F1: 0.3571
                precision    recall  f1-score   support

           Yes       0.79      0.32      0.45       155
To some extent       0.16      0.73      0.26        22
            No       0.34      0.39      0.36        71

      accuracy                           0.38       248
     macro avg       0.43      0.48      0.36       248
  weighted avg       0.60      0.38      0.41       248



Epoch 10/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 10 Loss: 0.3327
Validation Accuracy: 0.3508  Validation Macro F1: 0.3403
                precision    recall  f1-score   support

           Yes       0.81      0.25      0.38       155
To some extent       0.16      0.73      0.26        22
            No       0.33      0.45      0.38        71

      accuracy                           0.35       248
     macro avg       0.43      0.48      0.34       248
  weighted avg       0.62      0.35      0.37       248



Epoch 11/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 11 Loss: 0.3344
Validation Accuracy: 0.3952  Validation Macro F1: 0.3774
                precision    recall  f1-score   support

           Yes       0.80      0.32      0.45       155
To some extent       0.17      0.73      0.27        22
            No       0.36      0.46      0.40        71

      accuracy                           0.40       248
     macro avg       0.44      0.50      0.38       248
  weighted avg       0.62      0.40      0.42       248



Epoch 12/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 12 Loss: 0.3257
Validation Accuracy: 0.4274  Validation Macro F1: 0.3961
                precision    recall  f1-score   support

           Yes       0.79      0.41      0.54       155
To some extent       0.17      0.77      0.28        22
            No       0.37      0.37      0.37        71

      accuracy                           0.43       248
     macro avg       0.44      0.52      0.40       248
  weighted avg       0.61      0.43      0.47       248



Epoch 13/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 13 Loss: 0.3265
Validation Accuracy: 0.3548  Validation Macro F1: 0.3409
                precision    recall  f1-score   support

           Yes       0.78      0.25      0.37       155
To some extent       0.15      0.64      0.24        22
            No       0.34      0.51      0.41        71

      accuracy                           0.35       248
     macro avg       0.42      0.46      0.34       248
  weighted avg       0.60      0.35      0.37       248



Epoch 14/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 14 Loss: 0.3039
Validation Accuracy: 0.3871  Validation Macro F1: 0.3652
                precision    recall  f1-score   support

           Yes       0.79      0.30      0.43       155
To some extent       0.15      0.59      0.24        22
            No       0.36      0.52      0.43        71

      accuracy                           0.39       248
     macro avg       0.43      0.47      0.37       248
  weighted avg       0.61      0.39      0.41       248



Epoch 15/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 15 Loss: 0.3268
Validation Accuracy: 0.3589  Validation Macro F1: 0.3460
                precision    recall  f1-score   support

           Yes       0.80      0.28      0.41       155
To some extent       0.16      0.73      0.26        22
            No       0.33      0.42      0.37        71

      accuracy                           0.36       248
     macro avg       0.43      0.48      0.35       248
  weighted avg       0.61      0.36      0.39       248



In [15]:
# ---------- Run ----------
json_path = "/kaggle/input/json-data/assignment_3_ai_tutors_dataset.json"
train_dataset, val_dataset, tokenizer, df = preprocess_dataset(json_path, "Pedagogical_Guidance_label")
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4)

In [16]:
for loss_name in ["ce", "smoothing", "focal"]:
    train_model(loss_name, train_loader, val_loader, num_labels=3)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



🔁 Training with: CE Loss



Epoch 1/15: 100%|██████████| 557/557 [02:05<00:00,  4.43it/s]


Epoch 1 Loss: 1.0905
Validation Accuracy: 0.3750  Validation Macro F1: 0.3498
                precision    recall  f1-score   support

           Yes       0.67      0.30      0.41       141
To some extent       0.24      0.84      0.38        50
            No       0.75      0.16      0.26        57

      accuracy                           0.38       248
     macro avg       0.55      0.43      0.35       248
  weighted avg       0.60      0.38      0.37       248



Epoch 2/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 2 Loss: 1.0394
Validation Accuracy: 0.5887  Validation Macro F1: 0.4179
                precision    recall  f1-score   support

           Yes       0.62      0.89      0.73       141
To some extent       0.34      0.24      0.28        50
            No       0.80      0.14      0.24        57

      accuracy                           0.59       248
     macro avg       0.59      0.42      0.42       248
  weighted avg       0.61      0.59      0.53       248



Epoch 3/15: 100%|██████████| 557/557 [02:04<00:00,  4.46it/s]


Epoch 3 Loss: 0.9659
Validation Accuracy: 0.5726  Validation Macro F1: 0.4410
                precision    recall  f1-score   support

           Yes       0.62      0.82      0.71       141
To some extent       0.41      0.18      0.25        50
            No       0.44      0.32      0.37        57

      accuracy                           0.57       248
     macro avg       0.49      0.44      0.44       248
  weighted avg       0.54      0.57      0.54       248



Epoch 4/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 4 Loss: 0.8934
Validation Accuracy: 0.5121  Validation Macro F1: 0.4535
                precision    recall  f1-score   support

           Yes       0.66      0.61      0.63       141
To some extent       0.29      0.50      0.37        50
            No       0.48      0.28      0.36        57

      accuracy                           0.51       248
     macro avg       0.48      0.46      0.45       248
  weighted avg       0.55      0.51      0.52       248



Epoch 5/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 5 Loss: 0.8332
Validation Accuracy: 0.4879  Validation Macro F1: 0.4712
                precision    recall  f1-score   support

           Yes       0.68      0.46      0.55       141
To some extent       0.33      0.52      0.40        50
            No       0.42      0.53      0.47        57

      accuracy                           0.49       248
     macro avg       0.47      0.50      0.47       248
  weighted avg       0.55      0.49      0.50       248



Epoch 6/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 6 Loss: 0.8047
Validation Accuracy: 0.5323  Validation Macro F1: 0.4557
                precision    recall  f1-score   support

           Yes       0.65      0.67      0.66       141
To some extent       0.29      0.46      0.36        50
            No       0.58      0.25      0.35        57

      accuracy                           0.53       248
     macro avg       0.51      0.46      0.46       248
  weighted avg       0.56      0.53      0.53       248



Epoch 7/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 7 Loss: 0.7604
Validation Accuracy: 0.5121  Validation Macro F1: 0.4764
                precision    recall  f1-score   support

           Yes       0.66      0.56      0.61       141
To some extent       0.30      0.58      0.39        50
            No       0.59      0.33      0.43        57

      accuracy                           0.51       248
     macro avg       0.52      0.49      0.48       248
  weighted avg       0.57      0.51      0.52       248



Epoch 8/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 8 Loss: 0.7161
Validation Accuracy: 0.5282  Validation Macro F1: 0.4557
                precision    recall  f1-score   support

           Yes       0.64      0.67      0.66       141
To some extent       0.26      0.36      0.31        50
            No       0.56      0.32      0.40        57

      accuracy                           0.53       248
     macro avg       0.49      0.45      0.46       248
  weighted avg       0.55      0.53      0.53       248



Epoch 9/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 9 Loss: 0.7091
Validation Accuracy: 0.5202  Validation Macro F1: 0.4880
                precision    recall  f1-score   support

           Yes       0.69      0.55      0.61       141
To some extent       0.31      0.52      0.39        50
            No       0.48      0.44      0.46        57

      accuracy                           0.52       248
     macro avg       0.49      0.50      0.49       248
  weighted avg       0.57      0.52      0.53       248



Epoch 10/15: 100%|██████████| 557/557 [02:04<00:00,  4.46it/s]


Epoch 10 Loss: 0.6939
Validation Accuracy: 0.5242  Validation Macro F1: 0.4878
                precision    recall  f1-score   support

           Yes       0.72      0.55      0.62       141
To some extent       0.32      0.40      0.36        50
            No       0.42      0.58      0.49        57

      accuracy                           0.52       248
     macro avg       0.49      0.51      0.49       248
  weighted avg       0.57      0.52      0.54       248



Epoch 11/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 11 Loss: 0.6623
Validation Accuracy: 0.5282  Validation Macro F1: 0.4956
                precision    recall  f1-score   support

           Yes       0.75      0.54      0.63       141
To some extent       0.30      0.42      0.35        50
            No       0.45      0.60      0.51        57

      accuracy                           0.53       248
     macro avg       0.50      0.52      0.50       248
  weighted avg       0.59      0.53      0.54       248



Epoch 12/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 12 Loss: 0.6427
Validation Accuracy: 0.5806  Validation Macro F1: 0.4691
                precision    recall  f1-score   support

           Yes       0.64      0.81      0.71       141
To some extent       0.30      0.30      0.30        50
            No       0.79      0.26      0.39        57

      accuracy                           0.58       248
     macro avg       0.58      0.46      0.47       248
  weighted avg       0.60      0.58      0.56       248



Epoch 13/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 13 Loss: 0.6625
Validation Accuracy: 0.5323  Validation Macro F1: 0.4691
                precision    recall  f1-score   support

           Yes       0.65      0.65      0.65       141
To some extent       0.30      0.40      0.34        50
            No       0.50      0.35      0.41        57

      accuracy                           0.53       248
     macro avg       0.48      0.47      0.47       248
  weighted avg       0.54      0.53      0.53       248



Epoch 14/15: 100%|██████████| 557/557 [02:04<00:00,  4.46it/s]


Epoch 14 Loss: 0.6259
Validation Accuracy: 0.5242  Validation Macro F1: 0.4844
                precision    recall  f1-score   support

           Yes       0.69      0.56      0.62       141
To some extent       0.32      0.38      0.35        50
            No       0.43      0.56      0.48        57

      accuracy                           0.52       248
     macro avg       0.48      0.50      0.48       248
  weighted avg       0.56      0.52      0.53       248



Epoch 15/15: 100%|██████████| 557/557 [02:04<00:00,  4.46it/s]


Epoch 15 Loss: 0.6465
Validation Accuracy: 0.5403  Validation Macro F1: 0.4853
                precision    recall  f1-score   support

           Yes       0.67      0.64      0.65       141
To some extent       0.31      0.40      0.35        50
            No       0.49      0.42      0.45        57

      accuracy                           0.54       248
     macro avg       0.49      0.49      0.49       248
  weighted avg       0.55      0.54      0.55       248


🔁 Training with: SMOOTHING Loss



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 1 Loss: 1.1345
Validation Accuracy: 0.4758  Validation Macro F1: 0.3927
                precision    recall  f1-score   support

           Yes       0.62      0.59      0.60       141
To some extent       0.26      0.56      0.36        50
            No       0.88      0.12      0.22        57

      accuracy                           0.48       248
     macro avg       0.59      0.42      0.39       248
  weighted avg       0.61      0.48      0.47       248



Epoch 2/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 2 Loss: 1.1039
Validation Accuracy: 0.4798  Validation Macro F1: 0.4163
                precision    recall  f1-score   support

           Yes       0.62      0.59      0.60       141
To some extent       0.26      0.46      0.34        50
            No       0.48      0.23      0.31        57

      accuracy                           0.48       248
     macro avg       0.46      0.43      0.42       248
  weighted avg       0.52      0.48      0.48       248



Epoch 3/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 3 Loss: 1.0702
Validation Accuracy: 0.4274  Validation Macro F1: 0.4188
                precision    recall  f1-score   support

           Yes       0.72      0.35      0.47       141
To some extent       0.30      0.46      0.37        50
            No       0.33      0.60      0.42        57

      accuracy                           0.43       248
     macro avg       0.45      0.47      0.42       248
  weighted avg       0.55      0.43      0.44       248



Epoch 4/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 4 Loss: 1.0111
Validation Accuracy: 0.5484  Validation Macro F1: 0.4487
                precision    recall  f1-score   support

           Yes       0.64      0.74      0.68       141
To some extent       0.30      0.40      0.34        50
            No       0.67      0.21      0.32        57

      accuracy                           0.55       248
     macro avg       0.53      0.45      0.45       248
  weighted avg       0.58      0.55      0.53       248



Epoch 5/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 5 Loss: 0.9594
Validation Accuracy: 0.5202  Validation Macro F1: 0.4878
                precision    recall  f1-score   support

           Yes       0.70      0.55      0.61       141
To some extent       0.33      0.60      0.42        50
            No       0.48      0.39      0.43        57

      accuracy                           0.52       248
     macro avg       0.50      0.51      0.49       248
  weighted avg       0.57      0.52      0.53       248



Epoch 6/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 6 Loss: 0.9328
Validation Accuracy: 0.5766  Validation Macro F1: 0.5028
                precision    recall  f1-score   support

           Yes       0.67      0.72      0.70       141
To some extent       0.33      0.36      0.34        50
            No       0.56      0.40      0.47        57

      accuracy                           0.58       248
     macro avg       0.52      0.50      0.50       248
  weighted avg       0.58      0.58      0.57       248



Epoch 7/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 7 Loss: 0.8975
Validation Accuracy: 0.5081  Validation Macro F1: 0.4475
                precision    recall  f1-score   support

           Yes       0.68      0.61      0.64       141
To some extent       0.27      0.34      0.30        50
            No       0.40      0.40      0.40        57

      accuracy                           0.51       248
     macro avg       0.45      0.45      0.45       248
  weighted avg       0.53      0.51      0.52       248



Epoch 8/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 8 Loss: 0.8808
Validation Accuracy: 0.4798  Validation Macro F1: 0.4637
                precision    recall  f1-score   support

           Yes       0.74      0.44      0.55       141
To some extent       0.32      0.52      0.39        50
            No       0.38      0.54      0.45        57

      accuracy                           0.48       248
     macro avg       0.48      0.50      0.46       248
  weighted avg       0.57      0.48      0.50       248



Epoch 9/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 9 Loss: 0.8646
Validation Accuracy: 0.5524  Validation Macro F1: 0.5112
                precision    recall  f1-score   support

           Yes       0.70      0.61      0.65       141
To some extent       0.34      0.56      0.42        50
            No       0.52      0.40      0.46        57

      accuracy                           0.55       248
     macro avg       0.52      0.52      0.51       248
  weighted avg       0.59      0.55      0.56       248



Epoch 10/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 10 Loss: 0.8505
Validation Accuracy: 0.5282  Validation Macro F1: 0.4802
                precision    recall  f1-score   support

           Yes       0.69      0.60      0.64       141
To some extent       0.29      0.40      0.34        50
            No       0.46      0.46      0.46        57

      accuracy                           0.53       248
     macro avg       0.48      0.49      0.48       248
  weighted avg       0.56      0.53      0.54       248



Epoch 11/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 11 Loss: 0.8454
Validation Accuracy: 0.5444  Validation Macro F1: 0.5036
                precision    recall  f1-score   support

           Yes       0.71      0.60      0.65       141
To some extent       0.30      0.44      0.35        50
            No       0.52      0.49      0.50        57

      accuracy                           0.54       248
     macro avg       0.51      0.51      0.50       248
  weighted avg       0.58      0.54      0.56       248



Epoch 12/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 12 Loss: 0.8568
Validation Accuracy: 0.5242  Validation Macro F1: 0.4943
                precision    recall  f1-score   support

           Yes       0.69      0.56      0.62       141
To some extent       0.30      0.54      0.39        50
            No       0.56      0.42      0.48        57

      accuracy                           0.52       248
     macro avg       0.52      0.51      0.49       248
  weighted avg       0.58      0.52      0.54       248



Epoch 13/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 13 Loss: 0.8426
Validation Accuracy: 0.5202  Validation Macro F1: 0.4920
                precision    recall  f1-score   support

           Yes       0.69      0.54      0.61       141
To some extent       0.32      0.60      0.42        50
            No       0.51      0.40      0.45        57

      accuracy                           0.52       248
     macro avg       0.51      0.51      0.49       248
  weighted avg       0.58      0.52      0.53       248



Epoch 14/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 14 Loss: 0.8233
Validation Accuracy: 0.5645  Validation Macro F1: 0.5017
                precision    recall  f1-score   support

           Yes       0.67      0.69      0.68       141
To some extent       0.32      0.44      0.37        50
            No       0.58      0.37      0.45        57

      accuracy                           0.56       248
     macro avg       0.53      0.50      0.50       248
  weighted avg       0.58      0.56      0.57       248



Epoch 15/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 15 Loss: 0.8259
Validation Accuracy: 0.5605  Validation Macro F1: 0.5043
                precision    recall  f1-score   support

           Yes       0.68      0.66      0.67       141
To some extent       0.35      0.52      0.42        50
            No       0.54      0.35      0.43        57

      accuracy                           0.56       248
     macro avg       0.52      0.51      0.50       248
  weighted avg       0.58      0.56      0.56       248


🔁 Training with: FOCAL Loss



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 1 Loss: 0.5429
Validation Accuracy: 0.2137  Validation Macro F1: 0.1545
                precision    recall  f1-score   support

           Yes       0.00      0.00      0.00       141
To some extent       0.20      0.98      0.33        50
            No       0.80      0.07      0.13        57

      accuracy                           0.21       248
     macro avg       0.33      0.35      0.15       248
  weighted avg       0.22      0.21      0.10       248



Epoch 2/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 2 Loss: 0.5123
Validation Accuracy: 0.2863  Validation Macro F1: 0.2450
                precision    recall  f1-score   support

           Yes       1.00      0.06      0.11       141
To some extent       0.42      0.16      0.23        50
            No       0.25      0.96      0.40        57

      accuracy                           0.29       248
     macro avg       0.56      0.39      0.24       248
  weighted avg       0.71      0.29      0.20       248



Epoch 3/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 3 Loss: 0.4623
Validation Accuracy: 0.3226  Validation Macro F1: 0.3098
                precision    recall  f1-score   support

           Yes       0.90      0.13      0.22       141
To some extent       0.33      0.30      0.31        50
            No       0.26      0.82      0.39        57

      accuracy                           0.32       248
     macro avg       0.49      0.42      0.31       248
  weighted avg       0.64      0.32      0.28       248



Epoch 4/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 4 Loss: 0.4314
Validation Accuracy: 0.3992  Validation Macro F1: 0.4039
                precision    recall  f1-score   support

           Yes       0.85      0.20      0.32       141
To some extent       0.28      0.74      0.41        50
            No       0.40      0.60      0.48        57

      accuracy                           0.40       248
     macro avg       0.51      0.51      0.40       248
  weighted avg       0.63      0.40      0.38       248



Epoch 5/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 5 Loss: 0.3938
Validation Accuracy: 0.3145  Validation Macro F1: 0.3139
                precision    recall  f1-score   support

           Yes       0.88      0.05      0.09       141
To some extent       0.24      0.86      0.38        50
            No       0.45      0.49      0.47        57

      accuracy                           0.31       248
     macro avg       0.52      0.47      0.31       248
  weighted avg       0.65      0.31      0.24       248



Epoch 6/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 6 Loss: 0.3710
Validation Accuracy: 0.4153  Validation Macro F1: 0.4211
                precision    recall  f1-score   support

           Yes       0.79      0.23      0.36       141
To some extent       0.29      0.80      0.43        50
            No       0.43      0.53      0.47        57

      accuracy                           0.42       248
     macro avg       0.50      0.52      0.42       248
  weighted avg       0.60      0.42      0.40       248



Epoch 7/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 7 Loss: 0.3526
Validation Accuracy: 0.3911  Validation Macro F1: 0.3921
                precision    recall  f1-score   support

           Yes       0.74      0.23      0.35       141
To some extent       0.30      0.78      0.43        50
            No       0.36      0.46      0.40        57

      accuracy                           0.39       248
     macro avg       0.47      0.49      0.39       248
  weighted avg       0.56      0.39      0.38       248



Epoch 8/15: 100%|██████████| 557/557 [02:04<00:00,  4.46it/s]


Epoch 8 Loss: 0.3301
Validation Accuracy: 0.4194  Validation Macro F1: 0.4184
                precision    recall  f1-score   support

           Yes       0.76      0.30      0.43       141
To some extent       0.31      0.62      0.41        50
            No       0.34      0.54      0.42        57

      accuracy                           0.42       248
     macro avg       0.47      0.49      0.42       248
  weighted avg       0.57      0.42      0.42       248



Epoch 9/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 9 Loss: 0.3271
Validation Accuracy: 0.4073  Validation Macro F1: 0.4091
                precision    recall  f1-score   support

           Yes       0.84      0.22      0.35       141
To some extent       0.31      0.64      0.42        50
            No       0.36      0.67      0.46        57

      accuracy                           0.41       248
     macro avg       0.50      0.51      0.41       248
  weighted avg       0.62      0.41      0.39       248



Epoch 10/15: 100%|██████████| 557/557 [02:04<00:00,  4.46it/s]


Epoch 10 Loss: 0.3152
Validation Accuracy: 0.4395  Validation Macro F1: 0.4370
                precision    recall  f1-score   support

           Yes       0.78      0.32      0.45       141
To some extent       0.32      0.56      0.41        50
            No       0.35      0.63      0.45        57

      accuracy                           0.44       248
     macro avg       0.48      0.50      0.44       248
  weighted avg       0.59      0.44      0.44       248



Epoch 11/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 11 Loss: 0.3083
Validation Accuracy: 0.4516  Validation Macro F1: 0.4385
                precision    recall  f1-score   support

           Yes       0.75      0.39      0.51       141
To some extent       0.29      0.72      0.42        50
            No       0.40      0.37      0.39        57

      accuracy                           0.45       248
     macro avg       0.48      0.49      0.44       248
  weighted avg       0.58      0.45      0.46       248



Epoch 12/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 12 Loss: 0.3059
Validation Accuracy: 0.4032  Validation Macro F1: 0.4038
                precision    recall  f1-score   support

           Yes       0.83      0.24      0.37       141
To some extent       0.33      0.54      0.41        50
            No       0.31      0.68      0.43        57

      accuracy                           0.40       248
     macro avg       0.49      0.49      0.40       248
  weighted avg       0.61      0.40      0.39       248



Epoch 13/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 13 Loss: 0.2933
Validation Accuracy: 0.4798  Validation Macro F1: 0.4668
                precision    recall  f1-score   support

           Yes       0.72      0.43      0.54       141
To some extent       0.31      0.66      0.43        50
            No       0.43      0.44      0.43        57

      accuracy                           0.48       248
     macro avg       0.49      0.51      0.47       248
  weighted avg       0.57      0.48      0.49       248



Epoch 14/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 14 Loss: 0.3067
Validation Accuracy: 0.4032  Validation Macro F1: 0.4011
                precision    recall  f1-score   support

           Yes       0.82      0.26      0.40       141
To some extent       0.33      0.46      0.39        50
            No       0.30      0.70      0.42        57

      accuracy                           0.40       248
     macro avg       0.48      0.47      0.40       248
  weighted avg       0.60      0.40      0.40       248



Epoch 15/15: 100%|██████████| 557/557 [02:04<00:00,  4.46it/s]


Epoch 15 Loss: 0.2941
Validation Accuracy: 0.4476  Validation Macro F1: 0.4460
                precision    recall  f1-score   support

           Yes       0.80      0.32      0.46       141
To some extent       0.33      0.60      0.43        50
            No       0.36      0.63      0.46        57

      accuracy                           0.45       248
     macro avg       0.50      0.52      0.45       248
  weighted avg       0.61      0.45      0.45       248



In [17]:
# ---------- Run ----------
json_path = "/kaggle/input/json-data/assignment_3_ai_tutors_dataset.json"
train_dataset, val_dataset, tokenizer, df = preprocess_dataset(json_path, "Actionability_label")
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4)

In [18]:
for loss_name in ["ce", "smoothing", "focal"]:
    train_model(loss_name, train_loader, val_loader, num_labels=3)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



🔁 Training with: CE Loss



Epoch 1/15: 100%|██████████| 557/557 [02:05<00:00,  4.43it/s]


Epoch 1 Loss: 1.0829
Validation Accuracy: 0.6089  Validation Macro F1: 0.4092
                precision    recall  f1-score   support

           Yes       0.60      0.89      0.72       131
To some extent       0.00      0.00      0.00        37
            No       0.63      0.42      0.51        80

      accuracy                           0.61       248
     macro avg       0.41      0.44      0.41       248
  weighted avg       0.52      0.61      0.54       248



Epoch 2/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 2 Loss: 0.9980
Validation Accuracy: 0.5968  Validation Macro F1: 0.4809
                precision    recall  f1-score   support

           Yes       0.62      0.76      0.69       131
To some extent       0.42      0.14      0.20        37
            No       0.57      0.54      0.55        80

      accuracy                           0.60       248
     macro avg       0.54      0.48      0.48       248
  weighted avg       0.57      0.60      0.57       248



Epoch 3/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 3 Loss: 0.9627
Validation Accuracy: 0.6371  Validation Macro F1: 0.4770
                precision    recall  f1-score   support

           Yes       0.62      0.95      0.75       131
To some extent       0.36      0.11      0.17        37
            No       0.83      0.38      0.52        80

      accuracy                           0.64       248
     macro avg       0.60      0.48      0.48       248
  weighted avg       0.65      0.64      0.59       248



Epoch 4/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 4 Loss: 0.8980
Validation Accuracy: 0.6089  Validation Macro F1: 0.5595
                precision    recall  f1-score   support

           Yes       0.66      0.73      0.69       131
To some extent       0.38      0.51      0.44        37
            No       0.67      0.46      0.55        80

      accuracy                           0.61       248
     macro avg       0.57      0.57      0.56       248
  weighted avg       0.62      0.61      0.61       248



Epoch 5/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 5 Loss: 0.8377
Validation Accuracy: 0.5806  Validation Macro F1: 0.5318
                precision    recall  f1-score   support

           Yes       0.70      0.66      0.68       131
To some extent       0.31      0.49      0.38        37
            No       0.59      0.49      0.53        80

      accuracy                           0.58       248
     macro avg       0.53      0.55      0.53       248
  weighted avg       0.61      0.58      0.59       248



Epoch 6/15: 100%|██████████| 557/557 [02:04<00:00,  4.46it/s]


Epoch 6 Loss: 0.7783
Validation Accuracy: 0.6250  Validation Macro F1: 0.5397
                precision    recall  f1-score   support

           Yes       0.65      0.84      0.73       131
To some extent       0.37      0.38      0.37        37
            No       0.76      0.39      0.51        80

      accuracy                           0.62       248
     macro avg       0.59      0.54      0.54       248
  weighted avg       0.64      0.62      0.61       248



Epoch 7/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 7 Loss: 0.7535
Validation Accuracy: 0.5242  Validation Macro F1: 0.4835
                precision    recall  f1-score   support

           Yes       0.70      0.61      0.65       131
To some extent       0.27      0.65      0.38        37
            No       0.58      0.33      0.42        80

      accuracy                           0.52       248
     macro avg       0.52      0.53      0.48       248
  weighted avg       0.59      0.52      0.54       248



Epoch 8/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 8 Loss: 0.7204
Validation Accuracy: 0.5524  Validation Macro F1: 0.4991
                precision    recall  f1-score   support

           Yes       0.67      0.64      0.66       131
To some extent       0.29      0.41      0.34        37
            No       0.53      0.47      0.50        80

      accuracy                           0.55       248
     macro avg       0.50      0.51      0.50       248
  weighted avg       0.57      0.55      0.56       248



Epoch 9/15: 100%|██████████| 557/557 [02:04<00:00,  4.46it/s]


Epoch 9 Loss: 0.6998
Validation Accuracy: 0.5484  Validation Macro F1: 0.4843
                precision    recall  f1-score   support

           Yes       0.69      0.60      0.64       131
To some extent       0.32      0.27      0.29        37
            No       0.46      0.59      0.52        80

      accuracy                           0.55       248
     macro avg       0.49      0.49      0.48       248
  weighted avg       0.56      0.55      0.55       248



Epoch 10/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 10 Loss: 0.6750
Validation Accuracy: 0.5363  Validation Macro F1: 0.5045
                precision    recall  f1-score   support

           Yes       0.71      0.53      0.61       131
To some extent       0.34      0.46      0.39        37
            No       0.46      0.57      0.51        80

      accuracy                           0.54       248
     macro avg       0.50      0.52      0.50       248
  weighted avg       0.57      0.54      0.55       248



Epoch 11/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 11 Loss: 0.6709
Validation Accuracy: 0.5444  Validation Macro F1: 0.5175
                precision    recall  f1-score   support

           Yes       0.71      0.53      0.61       131
To some extent       0.35      0.51      0.41        37
            No       0.49      0.59      0.53        80

      accuracy                           0.54       248
     macro avg       0.52      0.54      0.52       248
  weighted avg       0.59      0.54      0.55       248



Epoch 12/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 12 Loss: 0.6618
Validation Accuracy: 0.5081  Validation Macro F1: 0.4761
                precision    recall  f1-score   support

           Yes       0.68      0.51      0.58       131
To some extent       0.27      0.46      0.34        37
            No       0.49      0.53      0.51        80

      accuracy                           0.51       248
     macro avg       0.48      0.50      0.48       248
  weighted avg       0.56      0.51      0.52       248



Epoch 13/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 13 Loss: 0.6449
Validation Accuracy: 0.5847  Validation Macro F1: 0.5299
                precision    recall  f1-score   support

           Yes       0.69      0.69      0.69       131
To some extent       0.36      0.43      0.39        37
            No       0.53      0.49      0.51        80

      accuracy                           0.58       248
     macro avg       0.53      0.54      0.53       248
  weighted avg       0.59      0.58      0.59       248



Epoch 14/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 14 Loss: 0.6571
Validation Accuracy: 0.5605  Validation Macro F1: 0.5208
                precision    recall  f1-score   support

           Yes       0.69      0.63      0.66       131
To some extent       0.35      0.51      0.41        37
            No       0.51      0.47      0.49        80

      accuracy                           0.56       248
     macro avg       0.52      0.54      0.52       248
  weighted avg       0.58      0.56      0.57       248



Epoch 15/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 15 Loss: 0.6213
Validation Accuracy: 0.5685  Validation Macro F1: 0.5268
                precision    recall  f1-score   support

           Yes       0.67      0.65      0.66       131
To some extent       0.36      0.51      0.42        37
            No       0.54      0.46      0.50        80

      accuracy                           0.57       248
     macro avg       0.52      0.54      0.53       248
  weighted avg       0.58      0.57      0.57       248


🔁 Training with: SMOOTHING Loss



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 1 Loss: 1.1518
Validation Accuracy: 0.5282  Validation Macro F1: 0.2304
                precision    recall  f1-score   support

           Yes       0.53      1.00      0.69       131
To some extent       0.00      0.00      0.00        37
            No       0.00      0.00      0.00        80

      accuracy                           0.53       248
     macro avg       0.18      0.33      0.23       248
  weighted avg       0.28      0.53      0.37       248



Epoch 2/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 2 Loss: 1.1521
Validation Accuracy: 0.5282  Validation Macro F1: 0.2304
                precision    recall  f1-score   support

           Yes       0.53      1.00      0.69       131
To some extent       0.00      0.00      0.00        37
            No       0.00      0.00      0.00        80

      accuracy                           0.53       248
     macro avg       0.18      0.33      0.23       248
  weighted avg       0.28      0.53      0.37       248



Epoch 3/15: 100%|██████████| 557/557 [02:03<00:00,  4.49it/s]


Epoch 3 Loss: 1.1490
Validation Accuracy: 0.5282  Validation Macro F1: 0.2304
                precision    recall  f1-score   support

           Yes       0.53      1.00      0.69       131
To some extent       0.00      0.00      0.00        37
            No       0.00      0.00      0.00        80

      accuracy                           0.53       248
     macro avg       0.18      0.33      0.23       248
  weighted avg       0.28      0.53      0.37       248



Epoch 4/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 4 Loss: 1.1214
Validation Accuracy: 0.5968  Validation Macro F1: 0.3996
                precision    recall  f1-score   support

           Yes       0.60      0.88      0.71       131
To some extent       0.00      0.00      0.00        37
            No       0.60      0.41      0.49        80

      accuracy                           0.60       248
     macro avg       0.40      0.43      0.40       248
  weighted avg       0.51      0.60      0.53       248



Epoch 5/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 5 Loss: 1.1064
Validation Accuracy: 0.6048  Validation Macro F1: 0.4484
                precision    recall  f1-score   support

           Yes       0.61      0.87      0.72       131
To some extent       0.38      0.08      0.13        37
            No       0.61      0.41      0.49        80

      accuracy                           0.60       248
     macro avg       0.53      0.45      0.45       248
  weighted avg       0.58      0.60      0.56       248



Epoch 6/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 6 Loss: 1.1042
Validation Accuracy: 0.5040  Validation Macro F1: 0.3357
                precision    recall  f1-score   support

           Yes       0.55      0.73      0.63       131
To some extent       0.00      0.00      0.00        37
            No       0.40      0.36      0.38        80

      accuracy                           0.50       248
     macro avg       0.32      0.37      0.34       248
  weighted avg       0.42      0.50      0.45       248



Epoch 7/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 7 Loss: 1.0975
Validation Accuracy: 0.6048  Validation Macro F1: 0.3938
                precision    recall  f1-score   support

           Yes       0.60      0.93      0.73       131
To some extent       0.00      0.00      0.00        37
            No       0.65      0.35      0.46        80

      accuracy                           0.60       248
     macro avg       0.42      0.43      0.39       248
  weighted avg       0.52      0.60      0.53       248



Epoch 8/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 8 Loss: 1.0562
Validation Accuracy: 0.6089  Validation Macro F1: 0.4147
                precision    recall  f1-score   support

           Yes       0.59      0.97      0.73       131
To some extent       0.22      0.05      0.09        37
            No       0.92      0.28      0.42        80

      accuracy                           0.61       248
     macro avg       0.58      0.43      0.41       248
  weighted avg       0.64      0.61      0.54       248



Epoch 9/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 9 Loss: 1.0327
Validation Accuracy: 0.6169  Validation Macro F1: 0.4610
                precision    recall  f1-score   support

           Yes       0.63      0.87      0.73       131
To some extent       0.33      0.08      0.13        37
            No       0.62      0.45      0.52        80

      accuracy                           0.62       248
     macro avg       0.53      0.47      0.46       248
  weighted avg       0.58      0.62      0.57       248



Epoch 10/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 10 Loss: 1.0011
Validation Accuracy: 0.5282  Validation Macro F1: 0.5101
                precision    recall  f1-score   support

           Yes       0.66      0.50      0.57       131
To some extent       0.34      0.59      0.43        37
            No       0.52      0.54      0.53        80

      accuracy                           0.53       248
     macro avg       0.51      0.55      0.51       248
  weighted avg       0.57      0.53      0.54       248



Epoch 11/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 11 Loss: 0.9765
Validation Accuracy: 0.5685  Validation Macro F1: 0.5317
                precision    recall  f1-score   support

           Yes       0.71      0.64      0.67       131
To some extent       0.28      0.62      0.38        37
            No       0.74      0.42      0.54        80

      accuracy                           0.57       248
     macro avg       0.57      0.56      0.53       248
  weighted avg       0.65      0.57      0.59       248



Epoch 12/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 12 Loss: 0.9576
Validation Accuracy: 0.5484  Validation Macro F1: 0.5150
                precision    recall  f1-score   support

           Yes       0.68      0.57      0.62       131
To some extent       0.29      0.51      0.37        37
            No       0.58      0.53      0.55        80

      accuracy                           0.55       248
     macro avg       0.52      0.54      0.52       248
  weighted avg       0.59      0.55      0.56       248



Epoch 13/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 13 Loss: 0.9409
Validation Accuracy: 0.5444  Validation Macro F1: 0.5194
                precision    recall  f1-score   support

           Yes       0.70      0.56      0.63       131
To some extent       0.28      0.65      0.39        37
            No       0.66      0.46      0.54        80

      accuracy                           0.54       248
     macro avg       0.55      0.56      0.52       248
  weighted avg       0.63      0.54      0.56       248



Epoch 14/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 14 Loss: 0.9213
Validation Accuracy: 0.5806  Validation Macro F1: 0.5262
                precision    recall  f1-score   support

           Yes       0.68      0.69      0.68       131
To some extent       0.30      0.46      0.37        37
            No       0.62      0.46      0.53        80

      accuracy                           0.58       248
     macro avg       0.53      0.54      0.53       248
  weighted avg       0.60      0.58      0.59       248



Epoch 15/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 15 Loss: 0.9110
Validation Accuracy: 0.5806  Validation Macro F1: 0.5286
                precision    recall  f1-score   support

           Yes       0.66      0.71      0.69       131
To some extent       0.32      0.57      0.41        37
            No       0.71      0.38      0.49        80

      accuracy                           0.58       248
     macro avg       0.57      0.55      0.53       248
  weighted avg       0.63      0.58      0.58       248


🔁 Training with: FOCAL Loss



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 1 Loss: 0.5371
Validation Accuracy: 0.2661  Validation Macro F1: 0.2647
                precision    recall  f1-score   support

           Yes       0.00      0.00      0.00       131
To some extent       0.17      0.89      0.28        37
            No       0.69      0.41      0.52        80

      accuracy                           0.27       248
     macro avg       0.28      0.43      0.26       248
  weighted avg       0.25      0.27      0.21       248



Epoch 2/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 2 Loss: 0.4979
Validation Accuracy: 0.3105  Validation Macro F1: 0.3273
                precision    recall  f1-score   support

           Yes       0.83      0.18      0.30       131
To some extent       0.16      0.84      0.27        37
            No       0.81      0.28      0.41        80

      accuracy                           0.31       248
     macro avg       0.60      0.43      0.33       248
  weighted avg       0.72      0.31      0.33       248



Epoch 3/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 3 Loss: 0.4638
Validation Accuracy: 0.4597  Validation Macro F1: 0.4407
                precision    recall  f1-score   support

           Yes       0.66      0.49      0.56       131
To some extent       0.21      0.62      0.31        37
            No       0.68      0.34      0.45        80

      accuracy                           0.46       248
     macro avg       0.51      0.48      0.44       248
  weighted avg       0.60      0.46      0.49       248



Epoch 4/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 4 Loss: 0.4319
Validation Accuracy: 0.4073  Validation Macro F1: 0.4183
                precision    recall  f1-score   support

           Yes       0.71      0.35      0.47       131
To some extent       0.18      0.76      0.29        37
            No       0.90      0.34      0.49        80

      accuracy                           0.41       248
     macro avg       0.60      0.48      0.42       248
  weighted avg       0.69      0.41      0.45       248



Epoch 5/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 5 Loss: 0.3958
Validation Accuracy: 0.3589  Validation Macro F1: 0.3673
                precision    recall  f1-score   support

           Yes       0.70      0.20      0.31       131
To some extent       0.19      0.78      0.31        37
            No       0.56      0.42      0.48        80

      accuracy                           0.36       248
     macro avg       0.48      0.47      0.37       248
  weighted avg       0.58      0.36      0.37       248



Epoch 6/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 6 Loss: 0.3716
Validation Accuracy: 0.4073  Validation Macro F1: 0.4117
                precision    recall  f1-score   support

           Yes       0.76      0.29      0.42       131
To some extent       0.21      0.76      0.33        37
            No       0.56      0.44      0.49        80

      accuracy                           0.41       248
     macro avg       0.51      0.49      0.41       248
  weighted avg       0.61      0.41      0.43       248



Epoch 7/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 7 Loss: 0.3449
Validation Accuracy: 0.4758  Validation Macro F1: 0.4587
                precision    recall  f1-score   support

           Yes       0.70      0.47      0.56       131
To some extent       0.24      0.65      0.35        37
            No       0.56      0.40      0.47        80

      accuracy                           0.48       248
     macro avg       0.50      0.51      0.46       248
  weighted avg       0.58      0.48      0.50       248



Epoch 8/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 8 Loss: 0.3302
Validation Accuracy: 0.4274  Validation Macro F1: 0.4263
                precision    recall  f1-score   support

           Yes       0.77      0.31      0.45       131
To some extent       0.22      0.70      0.33        37
            No       0.51      0.49      0.50        80

      accuracy                           0.43       248
     macro avg       0.50      0.50      0.43       248
  weighted avg       0.61      0.43      0.45       248



Epoch 9/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 9 Loss: 0.3170
Validation Accuracy: 0.4113  Validation Macro F1: 0.4131
                precision    recall  f1-score   support

           Yes       0.77      0.28      0.41       131
To some extent       0.21      0.73      0.33        37
            No       0.52      0.47      0.50        80

      accuracy                           0.41       248
     macro avg       0.50      0.50      0.41       248
  weighted avg       0.61      0.41      0.43       248



Epoch 10/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 10 Loss: 0.3088
Validation Accuracy: 0.4516  Validation Macro F1: 0.4405
                precision    recall  f1-score   support

           Yes       0.74      0.43      0.54       131
To some extent       0.22      0.68      0.33        37
            No       0.53      0.39      0.45        80

      accuracy                           0.45       248
     macro avg       0.50      0.50      0.44       248
  weighted avg       0.59      0.45      0.48       248



Epoch 11/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 11 Loss: 0.3259
Validation Accuracy: 0.3992  Validation Macro F1: 0.3995
                precision    recall  f1-score   support

           Yes       0.73      0.25      0.37       131
To some extent       0.23      0.76      0.35        37
            No       0.46      0.47      0.47        80

      accuracy                           0.40       248
     macro avg       0.48      0.49      0.40       248
  weighted avg       0.57      0.40      0.40       248



Epoch 12/15: 100%|██████████| 557/557 [02:04<00:00,  4.48it/s]


Epoch 12 Loss: 0.3015
Validation Accuracy: 0.4718  Validation Macro F1: 0.4602
                precision    recall  f1-score   support

           Yes       0.70      0.47      0.56       131
To some extent       0.23      0.76      0.35        37
            No       0.70      0.35      0.47        80

      accuracy                           0.47       248
     macro avg       0.54      0.52      0.46       248
  weighted avg       0.63      0.47      0.50       248



Epoch 13/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 13 Loss: 0.2900
Validation Accuracy: 0.4395  Validation Macro F1: 0.4337
                precision    recall  f1-score   support

           Yes       0.81      0.29      0.43       131
To some extent       0.24      0.68      0.36        37
            No       0.47      0.57      0.52        80

      accuracy                           0.44       248
     macro avg       0.51      0.51      0.43       248
  weighted avg       0.61      0.44      0.45       248



Epoch 14/15: 100%|██████████| 557/557 [02:04<00:00,  4.49it/s]


Epoch 14 Loss: 0.2999
Validation Accuracy: 0.4556  Validation Macro F1: 0.4355
                precision    recall  f1-score   support

           Yes       0.65      0.49      0.56       131
To some extent       0.20      0.59      0.30        37
            No       0.66      0.34      0.45        80

      accuracy                           0.46       248
     macro avg       0.50      0.47      0.44       248
  weighted avg       0.59      0.46      0.48       248



Epoch 15/15: 100%|██████████| 557/557 [02:04<00:00,  4.47it/s]


Epoch 15 Loss: 0.3017
Validation Accuracy: 0.4315  Validation Macro F1: 0.4324
                precision    recall  f1-score   support

           Yes       0.84      0.28      0.42       131
To some extent       0.24      0.81      0.37        37
            No       0.51      0.50      0.51        80

      accuracy                           0.43       248
     macro avg       0.53      0.53      0.43       248
  weighted avg       0.65      0.43      0.44       248

