In [1]:
import os
os.getcwd()

'/home/zemota1/projects/hackathon-sword'

In [11]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd
import numpy as np

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
# 1. Load Data
class ClaimDataset(Dataset):
    def __init__(self, claims, labels, tokenizer, max_len):
        self.claims = claims
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.claims)

    def __getitem__(self, idx):
        claim_text = str(self.claims[idx])
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            claim_text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'claim_text': claim_text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }


In [5]:
# 2. Preprocessing
def preprocess(df, max_len, tokenizer):
    claims = df.claim.to_list()
    labels = df.label.to_list()

    label_encoder = LabelEncoder()
    labels = label_encoder.fit_transform(labels)

    return ClaimDataset(
        claims=claims,
        labels=labels,
        tokenizer=tokenizer,
        max_len=max_len
    )

In [6]:
df_train = pd.read_csv('datasets/train.tsv', sep='\t')
df_train.dropna(inplace=True)
df_val = pd.read_csv('datasets/dev.tsv', sep='\t')
df_val.dropna(inplace=True)
df_test = pd.read_csv('datasets/test.tsv', sep='\t')
df_test.dropna(inplace=True)

In [7]:
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

max_len = 128

train_dataset = preprocess(df_train, max_len, tokenizer)
val_dataset = preprocess(df_val, max_len, tokenizer)
test_dataset = preprocess(df_test, max_len, tokenizer)

In [9]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4)
model = model.to(device)

# Training Arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=100,
    weight_decay=0.01,
    evaluation_strategy='steps',  # Evaluates the model after each step
    logging_dir='./logs',
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Train the model
trainer.train()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

Step,Training Loss,Validation Loss
500,1.0349,0.929987
1000,0.8491,0.927053


TrainOutput(global_step=1479, training_loss=0.8221911158893784, metrics={'train_runtime': 400.5468, 'train_samples_per_second': 58.982, 'train_steps_per_second': 3.692, 'total_flos': 1554027576192000.0, 'train_loss': 0.8221911158893784, 'epoch': 3.0})

In [12]:
# 4. Evaluation
predictions, labels, _ = trainer.predict(test_dataset)
predictions = np.argmax(predictions, axis=1)

accuracy = accuracy_score(y_true=labels, y_pred=predictions)
print(f'Test Accuracy: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 score: {f1}')

Test Accuracy: 0.5700099304865939
Precision: 0.5634056584688958
Recall: 0.5700099304865939
F1 score: 0.5634814330118255
