In [24]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/json-data/assignment_3_ai_tutors_dataset.json


In [25]:
import torch
import torch.nn as nn
from transformers import BertForSequenceClassification, BertTokenizer
from sklearn.metrics import accuracy_score, f1_score, classification_report
from torch.utils.data import DataLoader
from tqdm import tqdm
import pandas as pd
import json
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split

In [26]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [27]:
class FocalLossWithWeights(nn.Module):
    def __init__(self, weight=None, gamma=2.0):
        super(FocalLossWithWeights, self).__init__()
        self.weight = weight
        self.gamma = gamma
        self.ce = nn.CrossEntropyLoss(reduction='none', weight=weight)

    def forward(self, inputs, targets):
        logp = self.ce(inputs, targets)
        p = torch.exp(-logp)
        loss = (1 - p) ** self.gamma * logp
        return loss.mean()


In [28]:
# ---------- Dataset Definition ----------
class TutorEvalSingleTaskDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        return {
            'input_ids': self.encodings['input_ids'][idx],
            'attention_mask': self.encodings['attention_mask'][idx],
            'labels': self.labels[idx]
        }

    def __len__(self):
        return len(self.labels)

In [29]:
# ---------- Preprocessing ----------
def load_and_flatten(json_path):
    with open(json_path) as f:
        data = json.load(f)

    rows = []
    for instance in data:
        convo_id = instance["conversation_id"]
        history = instance["conversation_history"]
        for tutor_id, tutor_data in instance["tutor_responses"].items():
            row = {
                "conversation_id": convo_id,
                "tutor_id": tutor_id,
                "conversation_history": history,
                "tutor_response": tutor_data["response"],
                "Mistake_Identification": tutor_data["annotation"]["Mistake_Identification"],
                "Mistake_Location": tutor_data["annotation"]["Mistake_Location"],
                "Pedagogical_Guidance": tutor_data["annotation"]["Providing_Guidance"],
                "Actionability": tutor_data["annotation"]["Actionability"]
            }
            rows.append(row)
    return pd.DataFrame(rows)

In [30]:
def build_input_text(row):
    return f"Context:\n{row['conversation_history']}\n\nTutor Response:\n{row['tutor_response']}"

LABEL_MAP = {"Yes": 0, "To some extent": 1, "No": 2}
MERGED_LABEL_MAP = {"Yes": 1, "To some extent": 1, "No": 0}

def encode_labels(df):
    for task in ["Mistake_Identification", "Mistake_Location", "Pedagogical_Guidance", "Actionability"]:
        df[f"{task}_label"] = df[task].map(LABEL_MAP)
        df[f"{task}_binary"] = df[task].map(MERGED_LABEL_MAP)
    return df

def tokenize_inputs(tokenizer, texts, max_length=256):
    return tokenizer(
        texts,
        add_special_tokens=True,
        truncation=True,
        padding=True,
        max_length=max_length,
        return_tensors="pt"
    )

In [31]:
def preprocess_dataset(json_path, task_label):
    df = load_and_flatten(json_path)
    df["input_text"] = df.apply(build_input_text, axis=1)
    df = encode_labels(df)

    train_df, val_df = train_test_split(df, test_size=0.1, stratify=df[task_label], random_state=42)

    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True)
    train_enc = tokenize_inputs(tokenizer, train_df["input_text"].tolist())
    val_enc = tokenize_inputs(tokenizer, val_df["input_text"].tolist())

    train_labels = torch.tensor(train_df[task_label].tolist())
    val_labels = torch.tensor(val_df[task_label].tolist())

    train_dataset = TutorEvalSingleTaskDataset(train_enc, train_labels)
    val_dataset = TutorEvalSingleTaskDataset(val_enc, val_labels)

    return train_dataset, val_dataset, tokenizer, df


In [32]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

def get_class_weights(labels, num_classes):
    class_weights = compute_class_weight(class_weight='balanced', classes=np.arange(num_classes), y=labels)
    return torch.tensor(class_weights, dtype=torch.float).to(device)


In [33]:
# ---------- Model ----------
class SingleTaskBertClassifier(nn.Module):
    def __init__(self, num_labels):
        super().__init__()
        self.bert = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)

    def forward(self, input_ids, attention_mask):
        return self.bert(input_ids=input_ids, attention_mask=attention_mask).logits

In [34]:
# ---------- Training ----------
def evaluate_model(model, val_loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].cpu().numpy()

            outputs = model(input_ids, attention_mask)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels)

    acc = accuracy_score(all_labels, all_preds)
    macro_f1 = f1_score(all_labels, all_preds, average='macro')
    print(f"Validation Accuracy: {acc:.4f}  Validation Macro F1: {macro_f1:.4f}")
    print(classification_report(all_labels, all_preds, target_names=["Yes", "To some extent", "No"], zero_division=0))

def train_model(loss_type, train_loader, val_loader, num_labels, epochs=15):
    print(f"\nüîÅ Training with: {loss_type.upper()} Loss\n")

    model = SingleTaskBertClassifier(num_labels).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)

    train_labels_list = [label.item() for batch in train_loader for label in batch['labels']]
    class_weights = get_class_weights(train_labels_list, num_labels)

    if loss_type == "focal":
        criterion = FocalLossWithWeights(class_weights)
    elif loss_type == "smoothing":
        criterion = nn.CrossEntropyLoss(weight=class_weights, label_smoothing=0.1)
    else:
        criterion = nn.CrossEntropyLoss(weight=class_weights)


    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1} Loss: {avg_loss:.4f}")
        evaluate_model(model, val_loader)

        '''if(epoch==epochs):
            save_path = f"/kaggle/working/best_model_{loss_type}.pth"
            torch.save(model.state_dict(), save_path)
            print(f"‚úÖ Model weights saved to {save_path}")'''



In [35]:
# ---------- Run ----------
json_path = "/kaggle/input/json-data/assignment_3_ai_tutors_dataset.json"
train_dataset, val_dataset, tokenizer, df = preprocess_dataset(json_path, "Mistake_Identification_label")
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4)



In [36]:
df

Unnamed: 0,conversation_id,tutor_id,conversation_history,tutor_response,Mistake_Identification,Mistake_Location,Pedagogical_Guidance,Actionability,input_text,Mistake_Identification_label,Mistake_Identification_binary,Mistake_Location_label,Mistake_Location_binary,Pedagogical_Guidance_label,Pedagogical_Guidance_binary,Actionability_label,Actionability_binary
0,221-362eb11a-f190-42a6-b2a4-985fafdcfa9e,Sonnet,"Tutor: Hi, could you please provide a step-by-...","Great, you've correctly identified the cost of...",Yes,Yes,Yes,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,0,1,0,1
1,221-362eb11a-f190-42a6-b2a4-985fafdcfa9e,Llama318B,"Tutor: Hi, could you please provide a step-by-...",Now that we know the cost of 1 pound of meat i...,Yes,To some extent,To some extent,To some extent,"Context:\nTutor: Hi, could you please provide ...",0,1,1,1,1,1,1,1
2,221-362eb11a-f190-42a6-b2a4-985fafdcfa9e,Llama31405B,"Tutor: Hi, could you please provide a step-by-...","You're close, but I notice that you calculated...",Yes,Yes,Yes,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,0,1,0,1
3,221-362eb11a-f190-42a6-b2a4-985fafdcfa9e,GPT4,"Tutor: Hi, could you please provide a step-by-...","That's correct. So, if 1 pound of meat costs $...",Yes,Yes,Yes,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,0,1,0,1
4,221-362eb11a-f190-42a6-b2a4-985fafdcfa9e,Mistral,"Tutor: Hi, could you please provide a step-by-...",It seems like you've calculated the cost as if...,Yes,Yes,Yes,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2471,5910-25617a89-a4ae-47bb-8812-d6b39fa4e691,Mistral,"Tutor: Hi, could you please provide a step-by-...",It seems there might be a misunderstanding in ...,Yes,Yes,To some extent,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,1,1,0,1
2472,5910-25617a89-a4ae-47bb-8812-d6b39fa4e691,Phi3,"Tutor: Hi, could you please provide a step-by-...","To solve this problem, we need to add the numb...",No,No,No,No,"Context:\nTutor: Hi, could you please provide ...",2,0,2,0,2,0,2,0
2473,5910-25617a89-a4ae-47bb-8812-d6b39fa4e691,Sonnet,"Tutor: Hi, could you please provide a step-by-...",That's a great start and I like how you worked...,Yes,Yes,Yes,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,0,1,0,1
2474,5910-25617a89-a4ae-47bb-8812-d6b39fa4e691,Expert,"Tutor: Hi, could you please provide a step-by-...",Okay. So Hector gave 5 less than four times as...,Yes,Yes,Yes,Yes,"Context:\nTutor: Hi, could you please provide ...",0,1,0,1,0,1,0,1


In [37]:
for loss_name in ["ce", "smoothing", "focal"]:
    train_model(loss_name, train_loader, val_loader, num_labels=3)


üîÅ Training with: CE Loss



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:58<00:00,  4.70it/s]


Epoch 1 Loss: 1.0134
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 2/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 2 Loss: 0.9611
Validation Accuracy: 0.7702  Validation Macro F1: 0.3642
                precision    recall  f1-score   support

           Yes       0.79      0.97      0.87       194
To some extent       0.30      0.18      0.22        17
            No       0.00      0.00      0.00        37

      accuracy                           0.77       248
     macro avg       0.36      0.38      0.36       248
  weighted avg       0.64      0.77      0.70       248



Epoch 3/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 3 Loss: 0.9191
Validation Accuracy: 0.6855  Validation Macro F1: 0.4332
                precision    recall  f1-score   support

           Yes       0.83      0.80      0.81       194
To some extent       0.25      0.18      0.21        17
            No       0.24      0.32      0.28        37

      accuracy                           0.69       248
     macro avg       0.44      0.43      0.43       248
  weighted avg       0.70      0.69      0.69       248



Epoch 4/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.74it/s]


Epoch 4 Loss: 0.8594
Validation Accuracy: 0.7097  Validation Macro F1: 0.4975
                precision    recall  f1-score   support

           Yes       0.85      0.80      0.83       194
To some extent       0.26      0.41      0.32        17
            No       0.34      0.35      0.35        37

      accuracy                           0.71       248
     macro avg       0.48      0.52      0.50       248
  weighted avg       0.74      0.71      0.72       248



Epoch 5/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 5 Loss: 0.8091
Validation Accuracy: 0.7298  Validation Macro F1: 0.4910
                precision    recall  f1-score   support

           Yes       0.85      0.85      0.85       194
To some extent       0.21      0.47      0.29        17
            No       0.56      0.24      0.34        37

      accuracy                           0.73       248
     macro avg       0.54      0.52      0.49       248
  weighted avg       0.76      0.73      0.73       248



Epoch 6/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 6 Loss: 0.7531
Validation Accuracy: 0.7984  Validation Macro F1: 0.5135
                precision    recall  f1-score   support

           Yes       0.83      0.95      0.89       194
To some extent       0.50      0.24      0.32        17
            No       0.53      0.24      0.33        37

      accuracy                           0.80       248
     macro avg       0.62      0.48      0.51       248
  weighted avg       0.76      0.80      0.77       248



Epoch 7/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 7 Loss: 0.7208
Validation Accuracy: 0.7056  Validation Macro F1: 0.4745
                precision    recall  f1-score   support

           Yes       0.85      0.81      0.83       194
To some extent       0.18      0.35      0.24        17
            No       0.39      0.32      0.35        37

      accuracy                           0.71       248
     macro avg       0.47      0.50      0.47       248
  weighted avg       0.74      0.71      0.72       248



Epoch 8/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 8 Loss: 0.7016
Validation Accuracy: 0.7621  Validation Macro F1: 0.5018
                precision    recall  f1-score   support

           Yes       0.84      0.89      0.87       194
To some extent       0.20      0.29      0.24        17
            No       0.61      0.30      0.40        37

      accuracy                           0.76       248
     macro avg       0.55      0.49      0.50       248
  weighted avg       0.77      0.76      0.75       248



Epoch 9/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 9 Loss: 0.6931
Validation Accuracy: 0.6089  Validation Macro F1: 0.4357
                precision    recall  f1-score   support

           Yes       0.83      0.66      0.74       194
To some extent       0.22      0.29      0.25        17
            No       0.24      0.46      0.32        37

      accuracy                           0.61       248
     macro avg       0.43      0.47      0.44       248
  weighted avg       0.70      0.61      0.64       248



Epoch 10/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 10 Loss: 0.7067
Validation Accuracy: 0.7379  Validation Macro F1: 0.4628
                precision    recall  f1-score   support

           Yes       0.84      0.87      0.85       194
To some extent       0.25      0.18      0.21        17
            No       0.33      0.32      0.33        37

      accuracy                           0.74       248
     macro avg       0.47      0.46      0.46       248
  weighted avg       0.72      0.74      0.73       248



Epoch 11/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.74it/s]


Epoch 11 Loss: 0.6397
Validation Accuracy: 0.7863  Validation Macro F1: 0.4768
                precision    recall  f1-score   support

           Yes       0.83      0.94      0.88       194
To some extent       0.21      0.18      0.19        17
            No       0.64      0.24      0.35        37

      accuracy                           0.79       248
     macro avg       0.56      0.45      0.48       248
  weighted avg       0.76      0.79      0.76       248



Epoch 12/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 12 Loss: 0.6249
Validation Accuracy: 0.6976  Validation Macro F1: 0.4543
                precision    recall  f1-score   support

           Yes       0.84      0.80      0.82       194
To some extent       0.16      0.24      0.19        17
            No       0.35      0.35      0.35        37

      accuracy                           0.70       248
     macro avg       0.45      0.46      0.45       248
  weighted avg       0.72      0.70      0.71       248



Epoch 13/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 13 Loss: 0.6325
Validation Accuracy: 0.6331  Validation Macro F1: 0.4550
                precision    recall  f1-score   support

           Yes       0.87      0.70      0.77       194
To some extent       0.12      0.41      0.18        17
            No       0.44      0.38      0.41        37

      accuracy                           0.63       248
     macro avg       0.47      0.50      0.45       248
  weighted avg       0.75      0.63      0.68       248



Epoch 14/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.74it/s]


Epoch 14 Loss: 0.6273
Validation Accuracy: 0.7419  Validation Macro F1: 0.4865
                precision    recall  f1-score   support

           Yes       0.84      0.87      0.85       194
To some extent       0.13      0.24      0.17        17
            No       0.67      0.32      0.44        37

      accuracy                           0.74       248
     macro avg       0.55      0.48      0.49       248
  weighted avg       0.77      0.74      0.74       248



Epoch 15/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.74it/s]


Epoch 15 Loss: 0.6214
Validation Accuracy: 0.7177  Validation Macro F1: 0.4635
                precision    recall  f1-score   support

           Yes       0.84      0.84      0.84       194
To some extent       0.11      0.24      0.15        17
            No       0.61      0.30      0.40        37

      accuracy                           0.72       248
     macro avg       0.52      0.46      0.46       248
  weighted avg       0.76      0.72      0.73       248


üîÅ Training with: SMOOTHING Loss



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.72it/s]


Epoch 1 Loss: 1.3602
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 2/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.72it/s]


Epoch 2 Loss: 1.3620
Validation Accuracy: 0.4960  Validation Macro F1: 0.2980
                precision    recall  f1-score   support

           Yes       0.79      0.53      0.64       194
To some extent       0.00      0.00      0.00        17
            No       0.17      0.54      0.26        37

      accuracy                           0.50       248
     macro avg       0.32      0.36      0.30       248
  weighted avg       0.65      0.50      0.54       248



Epoch 3/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 3 Loss: 1.3559
Validation Accuracy: 0.7218  Validation Macro F1: 0.3136
                precision    recall  f1-score   support

           Yes       0.79      0.91      0.84       194
To some extent       0.00      0.00      0.00        17
            No       0.12      0.08      0.10        37

      accuracy                           0.72       248
     macro avg       0.30      0.33      0.31       248
  weighted avg       0.64      0.72      0.67       248



Epoch 4/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.74it/s]


Epoch 4 Loss: 1.3650
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 5/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.75it/s]


Epoch 5 Loss: 1.3657
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 6/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.75it/s]


Epoch 6 Loss: 1.3591
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 7/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.74it/s]


Epoch 7 Loss: 1.3635
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 8/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.75it/s]


Epoch 8 Loss: 1.3567
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 9/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.75it/s]


Epoch 9 Loss: 1.3641
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 10/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.75it/s]


Epoch 10 Loss: 1.3601
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 11/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.74it/s]


Epoch 11 Loss: 1.3624
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 12/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.74it/s]


Epoch 12 Loss: 1.3588
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 13/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.74it/s]


Epoch 13 Loss: 1.3598
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 14/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.74it/s]


Epoch 14 Loss: 1.3624
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248



Epoch 15/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.74it/s]


Epoch 15 Loss: 1.3571
Validation Accuracy: 0.7823  Validation Macro F1: 0.2926
                precision    recall  f1-score   support

           Yes       0.78      1.00      0.88       194
To some extent       0.00      0.00      0.00        17
            No       0.00      0.00      0.00        37

      accuracy                           0.78       248
     macro avg       0.26      0.33      0.29       248
  weighted avg       0.61      0.78      0.69       248


üîÅ Training with: FOCAL Loss



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 1 Loss: 0.6758
Validation Accuracy: 0.0806  Validation Macro F1: 0.0922
                precision    recall  f1-score   support

           Yes       0.00      0.00      0.00       194
To some extent       0.07      1.00      0.13        17
            No       0.75      0.08      0.15        37

      accuracy                           0.08       248
     macro avg       0.27      0.36      0.09       248
  weighted avg       0.12      0.08      0.03       248



Epoch 2/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 2 Loss: 0.6356
Validation Accuracy: 0.3266  Validation Macro F1: 0.3244
                precision    recall  f1-score   support

           Yes       0.97      0.20      0.33       194
To some extent       0.26      0.41      0.32        17
            No       0.20      0.97      0.33        37

      accuracy                           0.33       248
     macro avg       0.48      0.53      0.32       248
  weighted avg       0.81      0.33      0.33       248



Epoch 3/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 3 Loss: 0.6025
Validation Accuracy: 0.0806  Validation Macro F1: 0.0922
                precision    recall  f1-score   support

           Yes       0.00      0.00      0.00       194
To some extent       0.07      1.00      0.13        17
            No       0.75      0.08      0.15        37

      accuracy                           0.08       248
     macro avg       0.27      0.36      0.09       248
  weighted avg       0.12      0.08      0.03       248



Epoch 4/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 4 Loss: 0.5580
Validation Accuracy: 0.1815  Validation Macro F1: 0.1840
                precision    recall  f1-score   support

           Yes       0.93      0.07      0.13       194
To some extent       0.07      0.47      0.12        17
            No       0.19      0.62      0.30        37

      accuracy                           0.18       248
     macro avg       0.40      0.39      0.18       248
  weighted avg       0.76      0.18      0.16       248



Epoch 5/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 5 Loss: 0.5001
Validation Accuracy: 0.3266  Validation Macro F1: 0.3022
                precision    recall  f1-score   support

           Yes       1.00      0.23      0.38       194
To some extent       0.14      0.53      0.22        17
            No       0.19      0.73      0.31        37

      accuracy                           0.33       248
     macro avg       0.45      0.50      0.30       248
  weighted avg       0.82      0.33      0.36       248



Epoch 6/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 6 Loss: 0.4332
Validation Accuracy: 0.3266  Validation Macro F1: 0.2895
                precision    recall  f1-score   support

           Yes       1.00      0.25      0.40       194
To some extent       0.11      0.41      0.18        17
            No       0.18      0.68      0.29        37

      accuracy                           0.33       248
     macro avg       0.43      0.45      0.29       248
  weighted avg       0.82      0.33      0.37       248



Epoch 7/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 7 Loss: 0.4190
Validation Accuracy: 0.3387  Validation Macro F1: 0.3030
                precision    recall  f1-score   support

           Yes       1.00      0.27      0.42       194
To some extent       0.11      0.53      0.19        17
            No       0.20      0.62      0.30        37

      accuracy                           0.34       248
     macro avg       0.44      0.47      0.30       248
  weighted avg       0.82      0.34      0.39       248



Epoch 8/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 8 Loss: 0.3896
Validation Accuracy: 0.3427  Validation Macro F1: 0.3057
                precision    recall  f1-score   support

           Yes       1.00      0.28      0.44       194
To some extent       0.10      0.53      0.17        17
            No       0.22      0.59      0.32        37

      accuracy                           0.34       248
     macro avg       0.44      0.47      0.31       248
  weighted avg       0.82      0.34      0.40       248



Epoch 9/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 9 Loss: 0.3579
Validation Accuracy: 0.3710  Validation Macro F1: 0.3092
                precision    recall  f1-score   support

           Yes       0.92      0.34      0.50       194
To some extent       0.09      0.41      0.14        17
            No       0.20      0.51      0.29        37

      accuracy                           0.37       248
     macro avg       0.40      0.42      0.31       248
  weighted avg       0.75      0.37      0.44       248



Epoch 10/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 10 Loss: 0.3473
Validation Accuracy: 0.3548  Validation Macro F1: 0.3141
                precision    recall  f1-score   support

           Yes       0.98      0.29      0.45       194
To some extent       0.11      0.53      0.19        17
            No       0.21      0.62      0.31        37

      accuracy                           0.35       248
     macro avg       0.43      0.48      0.31       248
  weighted avg       0.81      0.35      0.41       248



Epoch 11/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 11 Loss: 0.3445
Validation Accuracy: 0.3669  Validation Macro F1: 0.3298
                precision    recall  f1-score   support

           Yes       0.98      0.28      0.44       194
To some extent       0.14      0.53      0.22        17
            No       0.21      0.73      0.33        37

      accuracy                           0.37       248
     macro avg       0.44      0.51      0.33       248
  weighted avg       0.81      0.37      0.41       248



Epoch 12/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 12 Loss: 0.3473
Validation Accuracy: 0.3105  Validation Macro F1: 0.2786
                precision    recall  f1-score   support

           Yes       0.98      0.26      0.41       194
To some extent       0.08      0.53      0.15        17
            No       0.20      0.49      0.28        37

      accuracy                           0.31       248
     macro avg       0.42      0.42      0.28       248
  weighted avg       0.80      0.31      0.37       248



Epoch 13/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 13 Loss: 0.3344
Validation Accuracy: 0.3468  Validation Macro F1: 0.3035
                precision    recall  f1-score   support

           Yes       0.95      0.29      0.44       194
To some extent       0.10      0.47      0.16        17
            No       0.21      0.59      0.31        37

      accuracy                           0.35       248
     macro avg       0.42      0.45      0.30       248
  weighted avg       0.78      0.35      0.40       248



Epoch 14/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 14 Loss: 0.3509
Validation Accuracy: 0.3589  Validation Macro F1: 0.3080
                precision    recall  f1-score   support

           Yes       0.92      0.31      0.47       194
To some extent       0.10      0.47      0.16        17
            No       0.20      0.54      0.30        37

      accuracy                           0.36       248
     macro avg       0.41      0.44      0.31       248
  weighted avg       0.76      0.36      0.42       248



Epoch 15/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 557/557 [01:57<00:00,  4.73it/s]


Epoch 15 Loss: 0.3292
Validation Accuracy: 0.3710  Validation Macro F1: 0.3140
                precision    recall  f1-score   support

           Yes       0.93      0.32      0.48       194
To some extent       0.10      0.41      0.16        17
            No       0.21      0.59      0.31        37

      accuracy                           0.37       248
     macro avg       0.41      0.44      0.31       248
  weighted avg       0.76      0.37      0.43       248

