In [1]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import Dataset
import itertools




In [2]:
train = pd.read_csv('yelp/train_en.txt', sep='\t')[['Sentence', 'Style']]
val   = pd.read_csv('yelp/val_en.txt', sep='\t')[['Sentence', 'Style']]
test  = pd.read_csv('yelp/test_en.txt', sep='\t')[['Sentence', 'Style']]

In [3]:
train_small = train.sample(5000, random_state=42)
val_small   = val.sample(2000, random_state=42)
test_small  = test.sample(2000, random_state=42)

In [4]:
label_map = {'positive': 1, 'negative': 0}
for df in [train_small, val_small, test_small]:
    df['Label'] = df['Style'].map(label_map)

In [5]:
class YelpDataset(Dataset):
    def __init__(self, sentences, labels, tokenizer, max_len=128):
        self.sentences = sentences
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sentence = str(self.sentences[idx])
        encoding = self.tokenizer(sentence, 
                                  padding='max_length', 
                                  truncation=True,
                                  max_length=self.max_len,
                                  return_tensors='pt')
        item = {key: val.squeeze(0) for key, val in encoding.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

In [6]:
def train_evaluate_transformer(model_name, train_df, val_df, test_df, epochs=1, lr=5e-5):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

    train_dataset = YelpDataset(train_df['Sentence'].tolist(), train_df['Label'].tolist(), tokenizer)
    val_dataset   = YelpDataset(val_df['Sentence'].tolist(), val_df['Label'].tolist(), tokenizer)
    test_dataset  = YelpDataset(test_df['Sentence'].tolist(), test_df['Label'].tolist(), tokenizer)

    training_args = TrainingArguments(
        output_dir=f'{model_name}_results',
        eval_strategy="epoch",
        learning_rate=lr,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=epochs,
        weight_decay=0.01,
        save_strategy="no",
        logging_strategy="epoch",
        report_to="none"
    )

    def compute_metrics(p):
        preds = p.predictions.argmax(-1)
        labels = p.label_ids
        return {
            'accuracy': accuracy_score(labels, preds),
            'precision': precision_score(labels, preds),
            'recall': recall_score(labels, preds),
            'f1': f1_score(labels, preds)
        }

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics
    )

    trainer.train()
    results = trainer.predict(test_dataset)
    metrics = {
        'accuracy': accuracy_score(test_df['Label'], results.predictions.argmax(-1)),
        'precision': precision_score(test_df['Label'], results.predictions.argmax(-1)),
        'recall': recall_score(test_df['Label'], results.predictions.argmax(-1)),
        'f1': f1_score(test_df['Label'], results.predictions.argmax(-1))
    }
    print(f"\nResults for {model_name} (lr={lr}, epochs={epochs}): {metrics}")
    return metrics

In [7]:
model_names = ["roberta-base", "distilbert-base-uncased"]
lr_list = [5e-5, 3e-5, 1e-5]
epoch_list = [2, 3]

In [8]:
all_results = []
for model_name, lr, epochs in itertools.product(model_names, lr_list, epoch_list):
    res = train_evaluate_transformer(model_name, train_small, val_small, test_small, epochs=epochs, lr=lr)
    all_results.append({
        'model': model_name,
        'lr': lr,
        'epochs': epochs,
        **res
    })

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4305,0.403898,0.86,0.907746,0.896384,0.902029
2,0.2909,0.456899,0.875,0.90027,0.929068,0.914442



Results for roberta-base (lr=5e-05, epochs=2): {'accuracy': 0.8925, 'precision': 0.9109176155391828, 'recall': 0.9431345353675451, 'f1': 0.9267461669505963}


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4343,0.494787,0.862,0.932292,0.871349,0.900791
2,0.3097,0.422773,0.8755,0.912561,0.914465,0.913512
3,0.1914,0.507701,0.883,0.911202,0.927677,0.919366



Results for roberta-base (lr=5e-05, epochs=3): {'accuracy': 0.8935, 'precision': 0.9132481506388702, 'recall': 0.941747572815534, 'f1': 0.9272789347900308}


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4107,0.349347,0.8825,0.894426,0.94854,0.920688
2,0.2698,0.431316,0.888,0.901455,0.947844,0.924068



Results for roberta-base (lr=3e-05, epochs=2): {'accuracy': 0.9005, 'precision': 0.9118621603711067, 'recall': 0.9542302357836339, 'f1': 0.9325652321247034}


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4162,0.434467,0.8765,0.888454,0.947149,0.916863
2,0.2895,0.461599,0.878,0.905571,0.926982,0.916151
3,0.1796,0.544567,0.8785,0.910653,0.921419,0.916004



Results for roberta-base (lr=3e-05, epochs=3): {'accuracy': 0.9, 'precision': 0.9253424657534246, 'recall': 0.9368932038834952, 'f1': 0.9310820124052378}


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3876,0.317304,0.8845,0.89011,0.95758,0.922613
2,0.2882,0.456354,0.892,0.903034,0.952017,0.926879



Results for roberta-base (lr=1e-05, epochs=2): {'accuracy': 0.897, 'precision': 0.9012987012987013, 'recall': 0.9625520110957004, 'f1': 0.9309188464118041}


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.407,0.341688,0.892,0.90625,0.947844,0.926581
2,0.2959,0.442015,0.893,0.912955,0.94089,0.926712
3,0.2189,0.501524,0.8935,0.914692,0.939499,0.92693



Results for roberta-base (lr=1e-05, epochs=3): {'accuracy': 0.9055, 'precision': 0.9190635451505017, 'recall': 0.9528432732316228, 'f1': 0.9356486210418795}


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.413,0.393057,0.849,0.906295,0.881085,0.893512
2,0.251,0.490074,0.861,0.886667,0.924896,0.905378



Results for distilbert-base-uncased (lr=5e-05, epochs=2): {'accuracy': 0.8745, 'precision': 0.9004707464694015, 'recall': 0.9285714285714286, 'f1': 0.9143052236258109}


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4126,0.352095,0.85,0.903546,0.885953,0.894663
2,0.2659,0.4916,0.858,0.902371,0.899861,0.901114
3,0.1421,0.589808,0.8585,0.893125,0.912378,0.902649



Results for distilbert-base-uncased (lr=5e-05, epochs=3): {'accuracy': 0.8695, 'precision': 0.8981793661496965, 'recall': 0.9237170596393898, 'f1': 0.9107692307692308}


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3899,0.346671,0.8595,0.887995,0.920723,0.904063
2,0.253,0.464705,0.863,0.889037,0.924896,0.906612



Results for distilbert-base-uncased (lr=3e-05, epochs=2): {'accuracy': 0.875, 'precision': 0.8994638069705094, 'recall': 0.9306518723994452, 'f1': 0.9147920927062031}


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3964,0.338364,0.861,0.895634,0.913074,0.90427
2,0.2552,0.440594,0.8655,0.90675,0.90612,0.906435
3,0.151,0.572945,0.8675,0.899796,0.917942,0.908778



Results for distilbert-base-uncased (lr=3e-05, epochs=3): {'accuracy': 0.874, 'precision': 0.9047619047619048, 'recall': 0.9223300970873787, 'f1': 0.9134615384615384}


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3881,0.371343,0.8575,0.894053,0.909597,0.901758
2,0.2894,0.436086,0.8625,0.882818,0.932545,0.907



Results for distilbert-base-uncased (lr=1e-05, epochs=2): {'accuracy': 0.876, 'precision': 0.9006711409395973, 'recall': 0.9306518723994452, 'f1': 0.9154160982264665}


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4003,0.370751,0.856,0.900976,0.89847,0.899721
2,0.2918,0.399385,0.8645,0.898294,0.91516,0.906648
3,0.2315,0.474237,0.863,0.886968,0.927677,0.906866



Results for distilbert-base-uncased (lr=1e-05, epochs=3): {'accuracy': 0.8765, 'precision': 0.9007377598926894, 'recall': 0.9313453536754508, 'f1': 0.9157858847596317}


In [9]:
results_df = pd.DataFrame(all_results)
print(results_df)

                      model       lr  epochs  accuracy  precision    recall  \
0              roberta-base  0.00005       2    0.8925   0.910918  0.943135   
1              roberta-base  0.00005       3    0.8935   0.913248  0.941748   
2              roberta-base  0.00003       2    0.9005   0.911862  0.954230   
3              roberta-base  0.00003       3    0.9000   0.925342  0.936893   
4              roberta-base  0.00001       2    0.8970   0.901299  0.962552   
5              roberta-base  0.00001       3    0.9055   0.919064  0.952843   
6   distilbert-base-uncased  0.00005       2    0.8745   0.900471  0.928571   
7   distilbert-base-uncased  0.00005       3    0.8695   0.898179  0.923717   
8   distilbert-base-uncased  0.00003       2    0.8750   0.899464  0.930652   
9   distilbert-base-uncased  0.00003       3    0.8740   0.904762  0.922330   
10  distilbert-base-uncased  0.00001       2    0.8760   0.900671  0.930652   
11  distilbert-base-uncased  0.00001       3    0.87

In [22]:
# Со менување на хиперпараметрите, перформансите на моделот минимално се менуваа
# Резултатите споредени со моделите од првата лабараториска вежба се скоро исти