In [11]:
from datasets import DatasetDict, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, \
    DataCollatorWithPadding, EarlyStoppingCallback, set_seed
from pathlib import Path
import numpy as np
import evaluate
import torch
from pynvml import *
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import classification_report
import random
import argparse
import sys
import shutil
import pandas as pd

def print_gpu_utilization():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used // 1024 ** 2} MB.")


def print_summary(result):
    print(f"Time: {result.metrics['train_runtime']:.2f}")
    print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
    print_gpu_utilization()


def tokenize_function(examples):
    return tokenizer(examples['sentence'], truncation=True, max_length=128)


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    scmetrics.add_batch(predictions=predictions, references=labels)
    return scmetrics.compute()


def create_labels(sentiment):
    labels = []
    for s in sentiment:
        if s == 'neutral':
            labels += [0]
        elif s == 'negative':
            labels += [1]
        else:
            labels += [2]
    return labels

In [7]:
MODEL =  "UFNLP/gatortron-base"

# set seed
seed = 42
random.seed(42)
np.random.seed(42)
set_seed(seed)
agreement = "60"

train = pd.read_csv(f'~/azure_gpt/data/test_{agreement}.csv', header=0)
train = train[["language", "MD_label"]]
train = Dataset.from_pandas(train).rename_columns({'language': 'sentence', "MD_label": 'sentiment'})
train = train.add_column('label', create_labels(train['sentiment']))

valid = pd.read_csv(f'~/azure_gpt/data/train_{agreement}.csv', header=0)
valid = valid[["language", "MD_label"]]
valid = Dataset.from_pandas(valid).rename_columns({'language': 'sentence', "MD_label": 'sentiment'})
valid = valid.add_column('label', create_labels(valid['sentiment']))

test = pd.read_csv(f'~/azure_gpt/data/validation_sentences.csv', header=0)
test = test[["language", "MD_label"]]
test = Dataset.from_pandas(test).rename_columns({'language': 'sentence', "MD_label": 'sentiment'})
test = test.add_column('label', create_labels(test['sentiment']))

label_dt = DatasetDict({
    'train': train,
    'dev': valid,
    'test': test})

print(label_dt)

DatasetDict({
    train: Dataset({
        features: ['sentence', 'sentiment', 'label'],
        num_rows: 23
    })
    dev: Dataset({
        features: ['sentence', 'sentiment', 'label'],
        num_rows: 10
    })
    test: Dataset({
        features: ['sentence', 'sentiment', 'label'],
        num_rows: 15
    })
})


In [8]:
tokenizer = AutoTokenizer.from_pretrained("UFNLP/gatortron-base")
tkn_dt = label_dt.map(tokenize_function, batched=True, num_proc=4)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Map (num_proc=4):   0%|          | 0/23 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/10 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/15 [00:00<?, ? examples/s]

In [9]:
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL, num_labels=3)
if torch.cuda.is_available():
    model.to('cuda') # put the model on the gpu once, and then add a batch everytime when doing a training or evaluation loop
    print_gpu_utilization()

Some weights of the model checkpoint at UFNLP/gatortron-base were not used when initializing MegatronBertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MegatronBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MegatronBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MegatronBertForSequenceClassification were not initi

In [10]:
# Hyperparameters (for best configuration selection)
# 1st evaluate the hyper parameters once at a time, and select each hyper parameter that gives you the best score on results aka F1 vs recall
params = {
    'batch_size': [2, 4, 6],
    'epochs': [1, 2, 4],
    'learning_rate': [5e-6, 1e-5, 2e-5, 5e-5, 1e-4],
    'weight_decay': [0, 0.01, 0.1], # how much the weight change is shrinking
    'warmup_ratio': [0, 0.01, 0.1] # ratio of examples it takes to get ready for the learning rate
}

metrics_file = f'classification_metrics_GatorTron_MD_{agreement}.csv'
if os.path.isfile(metrics_file):
    f = open(metrics_file, 'a')
else:
    f = open(metrics_file, 'w')
    f.write('batch_size,epochs,learning_rate,weight_decay,warmup_ratio,loss,f1,precision,recall\n')

best_model = []
best_f1 = 0.0
tmp_trainer, tmp_comb = None, None
for comb in list(ParameterGrid(params)):
    print(f"Parameters: {comb}")
    training_args = TrainingArguments(
        output_dir=f'runs/MD',
        evaluation_strategy='epoch', # every epoch the model is evaluated and checkpoint is made saving the weights
        eval_steps=1, # check, probably each step is by epoch
        logging_strategy='epoch',
        weight_decay=comb['weight_decay'],
        warmup_ratio=comb['warmup_ratio'],
        num_train_epochs=comb['epochs'],
        learning_rate=comb['learning_rate'],
        per_device_train_batch_size=comb['batch_size'],
        per_device_eval_batch_size=comb['batch_size'],
        save_strategy='epoch',
        load_best_model_at_end=True,
        metric_for_best_model='eval_f1',
        seed=seed,
        data_seed=seed)
    scmetrics = evaluate.load("../scmetrics")

    trainer = Trainer(model=model,
                      args=training_args,
                      callbacks=[EarlyStoppingCallback(early_stopping_patience=3)], # if loss isnt decreasing for 2 epochs then it stops training
                      train_dataset=tkn_dt['train'],
                      eval_dataset=tkn_dt['dev'],
                      compute_metrics=compute_metrics,
                      data_collator=data_collator)
    results = trainer.train()
    results_eval = trainer.evaluate()

    v = [comb['batch_size'], comb['epochs'], comb['learning_rate'], comb['weight_decay'], comb['warmup_ratio'],
    results.metrics['train_loss'], results_eval['eval_f1'], results_eval['eval_precision'], results_eval['eval_recall']]
    f.write(','.join([str(el) for el in v]) + '\n')

    if results_eval['eval_f1'] > best_f1:
        best_f1 = results_eval['eval_f1']
        tmp_trainer = trainer
        tmp_comb = comb
    print('-' * 100)
    print('\n\n')

# Error analysis step
labels_to_sen = {0: 'neutral', 1: 'negative', 2: 'positive'}
if tmp_trainer is not None:
    best_trainer = tmp_trainer
    best_comb = tmp_comb
    print(f'Best parameters configuration: {best_comb}')
    dev_pred = best_trainer.predict(tkn_dt['test'])
    pred = np.argmax(dev_pred.predictions, axis=-1)
    pred_score = np.max(torch.nn.functional.softmax(torch.tensor(dev_pred.predictions), dim=-1).numpy(), axis=-1)
    i = 0
    errors = {'FP': [], 'FN': []}
    for pred_lab, true_lab in zip(pred, dev_pred.label_ids):
        if pred_lab != true_lab:
            if pred_lab > 1:
                errors['FP'].append((
                    tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(tkn_dt['test']['input_ids'][i])),
                    pred_score[i], labels_to_sen[pred_lab], labels_to_sen[true_lab]))
            else:
                errors['FN'].append((tokenizer.convert_tokens_to_string(
                    tokenizer.convert_ids_to_tokens(tkn_dt['test']['input_ids'][i])), pred_score[i],
                                     labels_to_sen[pred_lab], labels_to_sen[true_lab]))
        i += 1
    with open(f'error_analysis_GatorTron_MD_{agreement}.tsv',
              'w') as f:
        f.write('sentence\tpredicted_label\ttrue_label\tprobability\n')
        for k, vect in errors.items():
            if k == 'FP':
                for sen in vect:
                    f.write(sen[0] + '\t' + f'PRED_{sen[2].upper()}' + '\t' + f'TRUE_{sen[3].upper()}' + '\t' + str(
                        sen[1]) + '\n')
                f.write('\n')
            else:
                for sen in vect:
                    f.write(sen[0] + '\t' + f'PRED_{sen[2].upper()}' + '\t' + f'TRUE_{sen[3].upper()}' + '\t' + str(
                        sen[1]) + '\n')
    test_pred = best_trainer.predict(tkn_dt['test'])
    print(test_pred.metrics)

    model_dir = f'runs/GatorTron/MD'
    for d in os.listdir(model_dir):
        # This removes the checkpoints (comment it if you want to keep them)
        if 'checkpoint' in d:
            shutil.rmtree(os.path.join(model_dir, d))
    best_trainer.save_model(
        output_dir=f'best_model/GatorTron/MD/GatorTron_{agreement}')
else:
    print("Precision is 0.0 change something in your model's configuration and retry.")
f.close()

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 5e-06, 'warmup_ratio': 0, 'weight_decay': 0}


Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.20      0.25      0.22         4
           1       0.40      0.50      0.44         4
           2       0.00      0.00      0.00         2

    accuracy                           0.30        10
   macro avg       0.20      0.25      0.22        10
weighted avg       0.24      0.30      0.27        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.50      0.50      0.50         4
           2       0.00      0.00      0.00         2

    accuracy                           0.40        10
   macro avg       0.28      0.33      0.30        10
weighted avg       0.33      0.40      0.36        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.50      0.50      0.50         4
           2       0.00      0.00      0.00         2

    accuracy                           0.40        10
   macro avg       0.28      0.33      0.30        10
weighted avg       0.33      0.40      0.36        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 5e-06, 'warmup_ratio': 0, 'weight_decay': 0.1}


Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.50      0.50      0.50         4
           2       0.00      0.00      0.00         2

    accuracy                           0.40        10
   macro avg       0.28      0.33      0.30        10
weighted avg       0.33      0.40      0.36        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.50      0.50      0.50         4
           2       0.00      0.00      0.00         2

    accuracy                           0.40        10
   macro avg       0.28      0.33      0.30        10
weighted avg       0.33      0.40      0.36        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.50      0.50      0.50         4
           2       0.00      0.00      0.00         2

    accuracy                           0.40        10
   macro avg       0.28      0.33      0.30        10
weighted avg       0.33      0.40      0.36        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 5e-06, 'warmup_ratio': 0.1, 'weight_decay': 0}


Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.50      0.50      0.50         4
           2       0.00      0.00      0.00         2

    accuracy                           0.40        10
   macro avg       0.28      0.33      0.30        10
weighted avg       0.33      0.40      0.36        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.50      0.50      0.50         4
           2       0.00      0.00      0.00         2

    accuracy                           0.40        10
   macro avg       0.28      0.33      0.30        10
weighted avg       0.33      0.40      0.36        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.50      0.50      0.50         4
           2       0.00      0.00      0.00         2

    accuracy                           0.40        10
   macro avg       0.28      0.33      0.30        10
weighted avg       0.33      0.40      0.36        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 5e-06, 'warmup_ratio': 0.1, 'weight_decay': 0.1}


Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.50      0.50      0.50         4
           2       0.00      0.00      0.00         2

    accuracy                           0.40        10
   macro avg       0.28      0.33      0.30        10
weighted avg       0.33      0.40      0.36        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.50      0.50      0.50         4
           2       0.00      0.00      0.00         2

    accuracy                           0.40        10
   macro avg       0.28      0.33      0.30        10
weighted avg       0.33      0.40      0.36        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.50      0.50      0.50         4
           2       0.00      0.00      0.00         2

    accuracy                           0.40        10
   macro avg       0.28      0.33      0.30        10
weighted avg       0.33      0.40      0.36        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 1e-05, 'warmup_ratio': 0, 'weight_decay': 0}


Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.50      0.50      0.50         4
           2       0.00      0.00      0.00         2

    accuracy                           0.40        10
   macro avg       0.28      0.33      0.30        10
weighted avg       0.33      0.40      0.36        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.40      0.50      0.44         4
           1       0.60      0.75      0.67         4
           2       0.00      0.00      0.00         2

    accuracy                           0.50        10
   macro avg       0.33      0.42      0.37        10
weighted avg       0.40      0.50      0.44        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.40      0.50      0.44         4
           1       0.60      0.75      0.67         4
           2       0.00      0.00      0.00         2

    accuracy                           0.50        10
   macro avg       0.33      0.42      0.37        10
weighted avg       0.40      0.50      0.44        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 1e-05, 'warmup_ratio': 0, 'weight_decay': 0.1}


Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.50      1.00      0.67         4
           1       1.00      0.50      0.67         4
           2       0.00      0.00      0.00         2

    accuracy                           0.60        10
   macro avg       0.50      0.50      0.44        10
weighted avg       0.60      0.60      0.53        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.50      0.75      0.60         4
           1       0.67      0.50      0.57         4
           2       1.00      0.50      0.67         2

    accuracy                           0.60        10
   macro avg       0.72      0.58      0.61        10
weighted avg       0.67      0.60      0.60        10



              precision    recall  f1-score   support

           0       0.50      0.75      0.60         4
           1       0.67      0.50      0.57         4
           2       1.00      0.50      0.67         2

    accuracy                           0.60        10
   macro avg       0.72      0.58      0.61        10
weighted avg       0.67      0.60      0.60        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 1e-05, 'warmup_ratio': 0.1, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.60      0.75      0.67         4
           1       0.75      0.75      0.75         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.78      0.67      0.69        10
weighted avg       0.74      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.50      0.50      0.50         4
           1       0.60      0.75      0.67         4
           2       1.00      0.50      0.67         2

    accuracy                           0.60        10
   macro avg       0.70      0.58      0.61        10
weighted avg       0.64      0.60      0.60        10



              precision    recall  f1-score   support

           0       0.60      0.75      0.67         4
           1       0.75      0.75      0.75         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.78      0.67      0.69        10
weighted avg       0.74      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 1e-05, 'warmup_ratio': 0.1, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.50      0.50      0.50         4
           1       0.60      0.75      0.67         4
           2       1.00      0.50      0.67         2

    accuracy                           0.60        10
   macro avg       0.70      0.58      0.61        10
weighted avg       0.64      0.60      0.60        10

              precision    recall  f1-score   support

           0       0.60      0.75      0.67         4
           1       0.75      0.75      0.75         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.78      0.67      0.69        10
weighted avg       0.74      0.70      0.70        10



              precision    recall  f1-score   support

           0       0.60      0.75      0.67         4
           1       0.75      0.75      0.75         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.78      0.67      0.69        10
weighted avg       0.74      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 2e-05, 'warmup_ratio': 0, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.60      0.75      0.67         4
           1       0.75      0.75      0.75         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.78      0.67      0.69        10
weighted avg       0.74      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.60      0.75      0.67         4
           1       0.75      0.75      0.75         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.78      0.67      0.69        10
weighted avg       0.74      0.70      0.70        10



              precision    recall  f1-score   support

           0       0.60      0.75      0.67         4
           1       0.75      0.75      0.75         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.78      0.67      0.69        10
weighted avg       0.74      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 2e-05, 'warmup_ratio': 0, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.57      1.00      0.73         4
           1       1.00      0.50      0.67         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.86      0.67      0.69        10
weighted avg       0.83      0.70      0.69        10

              precision    recall  f1-score   support

           0       0.57      1.00      0.73         4
           1       1.00      0.50      0.67         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.86      0.67      0.69        10
weighted avg       0.83      0.70      0.69        10



              precision    recall  f1-score   support

           0       0.57      1.00      0.73         4
           1       1.00      0.50      0.67         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.86      0.67      0.69        10
weighted avg       0.83      0.70      0.69        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 2e-05, 'warmup_ratio': 0.1, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.57      1.00      0.73         4
           1       1.00      0.50      0.67         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.86      0.67      0.69        10
weighted avg       0.83      0.70      0.69        10

              precision    recall  f1-score   support

           0       0.57      1.00      0.73         4
           1       1.00      0.50      0.67         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.86      0.67      0.69        10
weighted avg       0.83      0.70      0.69        10



              precision    recall  f1-score   support

           0       0.57      1.00      0.73         4
           1       1.00      0.50      0.67         4
           2       1.00      0.50      0.67         2

    accuracy                           0.70        10
   macro avg       0.86      0.67      0.69        10
weighted avg       0.83      0.70      0.69        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 2e-05, 'warmup_ratio': 0.1, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.50      0.50      0.50         4
           1       0.75      0.75      0.75         4
           2       0.50      0.50      0.50         2

    accuracy                           0.60        10
   macro avg       0.58      0.58      0.58        10
weighted avg       0.60      0.60      0.60        10

              precision    recall  f1-score   support

           0       0.50      0.50      0.50         4
           1       0.75      0.75      0.75         4
           2       0.50      0.50      0.50         2

    accuracy                           0.60        10
   macro avg       0.58      0.58      0.58        10
weighted avg       0.60      0.60      0.60        10



              precision    recall  f1-score   support

           0       0.50      0.50      0.50         4
           1       0.75      0.75      0.75         4
           2       0.50      0.50      0.50         2

    accuracy                           0.60        10
   macro avg       0.58      0.58      0.58        10
weighted avg       0.60      0.60      0.60        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 5e-05, 'warmup_ratio': 0, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      1.00      0.80         4
           1       1.00      0.75      0.86         4
           2       1.00      0.50      0.67         2

    accuracy                           0.80        10
   macro avg       0.89      0.75      0.77        10
weighted avg       0.87      0.80      0.80        10

              precision    recall  f1-score   support

           0       0.50      0.50      0.50         4
           1       0.75      0.75      0.75         4
           2       0.50      0.50      0.50         2

    accuracy                           0.60        10
   macro avg       0.58      0.58      0.58        10
weighted avg       0.60      0.60      0.60        10



              precision    recall  f1-score   support

           0       0.67      1.00      0.80         4
           1       1.00      0.75      0.86         4
           2       1.00      0.50      0.67         2

    accuracy                           0.80        10
   macro avg       0.89      0.75      0.77        10
weighted avg       0.87      0.80      0.80        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 5e-05, 'warmup_ratio': 0, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.50      0.75      0.60         4
           1       1.00      0.50      0.67         4
           2       0.50      0.50      0.50         2

    accuracy                           0.60        10
   macro avg       0.67      0.58      0.59        10
weighted avg       0.70      0.60      0.61        10

              precision    recall  f1-score   support

           0       0.50      0.75      0.60         4
           1       1.00      0.50      0.67         4
           2       0.50      0.50      0.50         2

    accuracy                           0.60        10
   macro avg       0.67      0.58      0.59        10
weighted avg       0.70      0.60      0.61        10



              precision    recall  f1-score   support

           0       0.50      0.75      0.60         4
           1       1.00      0.50      0.67         4
           2       0.50      0.50      0.50         2

    accuracy                           0.60        10
   macro avg       0.67      0.58      0.59        10
weighted avg       0.70      0.60      0.61        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 5e-05, 'warmup_ratio': 0.1, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.57      1.00      0.73         4
           1       1.00      0.75      0.86         4
           2       0.00      0.00      0.00         2

    accuracy                           0.70        10
   macro avg       0.52      0.58      0.53        10
weighted avg       0.63      0.70      0.63        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.75      0.67         4
           1       1.00      0.75      0.86         4
           2       0.50      0.50      0.50         2

    accuracy                           0.70        10
   macro avg       0.70      0.67      0.67        10
weighted avg       0.74      0.70      0.71        10



              precision    recall  f1-score   support

           0       0.60      0.75      0.67         4
           1       1.00      0.75      0.86         4
           2       0.50      0.50      0.50         2

    accuracy                           0.70        10
   macro avg       0.70      0.67      0.67        10
weighted avg       0.74      0.70      0.71        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 5e-05, 'warmup_ratio': 0.1, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.50      0.75      0.60         4
           1       1.00      0.50      0.67         4
           2       0.50      0.50      0.50         2

    accuracy                           0.60        10
   macro avg       0.67      0.58      0.59        10
weighted avg       0.70      0.60      0.61        10

              precision    recall  f1-score   support

           0       0.50      1.00      0.67         4
           1       1.00      0.50      0.67         4
           2       0.00      0.00      0.00         2

    accuracy                           0.60        10
   macro avg       0.50      0.50      0.44        10
weighted avg       0.60      0.60      0.53        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.50      0.75      0.60         4
           1       1.00      0.50      0.67         4
           2       0.50      0.50      0.50         2

    accuracy                           0.60        10
   macro avg       0.67      0.58      0.59        10
weighted avg       0.70      0.60      0.61        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 0.0001, 'warmup_ratio': 0, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.60      0.75      0.67         4
           2       0.40      1.00      0.57         2

    accuracy                           0.50        10
   macro avg       0.33      0.58      0.41        10
weighted avg       0.32      0.50      0.38        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.67      1.00      0.80         4
           1       1.00      0.50      0.67         4
           2       1.00      1.00      1.00         2

    accuracy                           0.80        10
   macro avg       0.89      0.83      0.82        10
weighted avg       0.87      0.80      0.79        10



              precision    recall  f1-score   support

           0       0.67      1.00      0.80         4
           1       1.00      0.50      0.67         4
           2       1.00      1.00      1.00         2

    accuracy                           0.80        10
   macro avg       0.89      0.83      0.82        10
weighted avg       0.87      0.80      0.79        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 0.0001, 'warmup_ratio': 0, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.60      0.75      0.67         4
           2       0.50      1.00      0.67         2

    accuracy                           0.50        10
   macro avg       0.37      0.58      0.44        10
weighted avg       0.34      0.50      0.40        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10



              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 0.0001, 'warmup_ratio': 0.1, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.60      0.75      0.67         4
           2       0.50      1.00      0.67         2

    accuracy                           0.50        10
   macro avg       0.37      0.58      0.44        10
weighted avg       0.34      0.50      0.40        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10



              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 2, 'learning_rate': 0.0001, 'warmup_ratio': 0.1, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10



              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 5e-06, 'warmup_ratio': 0, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 5e-06, 'warmup_ratio': 0, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 5e-06, 'warmup_ratio': 0.1, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 5e-06, 'warmup_ratio': 0.1, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 1e-05, 'warmup_ratio': 0, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 1e-05, 'warmup_ratio': 0, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 1e-05, 'warmup_ratio': 0.1, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 1e-05, 'warmup_ratio': 0.1, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 2e-05, 'warmup_ratio': 0, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 2e-05, 'warmup_ratio': 0, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 2e-05, 'warmup_ratio': 0.1, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 2e-05, 'warmup_ratio': 0.1, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 5e-05, 'warmup_ratio': 0, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 5e-05, 'warmup_ratio': 0, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 5e-05, 'warmup_ratio': 0.1, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 5e-05, 'warmup_ratio': 0.1, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 0.0001, 'warmup_ratio': 0, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 0.0001, 'warmup_ratio': 0, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 0.0001, 'warmup_ratio': 0.1, 'weight_decay': 0}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Parameters: {'batch_size': 4, 'epochs': 4, 'learning_rate': 0.0001, 'warmup_ratio': 0.1, 'weight_decay': 0.1}




Epoch,Training Loss,Validation Loss


              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy        

              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       1.00      0.75      0.86         4
           2       0.50      1.00      0.67         2

    accuracy                           0.70        10
   macro avg       0.72      0.75      0.70        10
weighted avg       0.77      0.70      0.70        10

----------------------------------------------------------------------------------------------------



Best parameters configuration: {'batch_size': 4, 'epochs': 2, 'learning_rate': 5e-05, 'warmup_ratio': 0, 'weight_decay': 0}
              precision    recall  f1-score   support

           0       0.67      0.67      0.67         3
           1       0.88      0.70      0.78        10
           2       0.50      1.00      0.67         2

    accuracy                           0.73        15
   macro avg       0.68      0.79      0.70        15
weighted avg       0.78      0.73      0.74        15



              precision    recall  f1-score   support

           0       0.67      0.67      0.67         3
           1       0.88      0.70      0.78        10
           2       0.50      1.00      0.67         2

    accuracy                           0.73        15
   macro avg       0.68      0.79      0.70        15
weighted avg       0.78      0.73      0.74        15

{'test_loss': 1.163994312286377, 'test_f1': 0.7407407407407407, 'test_precision': 0.7833333333333333, 'test_recall': 0.7333333333333333, 'test_runtime': 7.6853, 'test_samples_per_second': 1.952, 'test_steps_per_second': 0.52}


FileNotFoundError: [Errno 2] No such file or directory: 'runs/GatorTron/MD'

In [None]:
# GatorTron - MD 60%
# best paramters: {'batch_size': 4, 'epochs': 1, 'learning_rate': 5e-05, 'warmup_ratio': 0.1, 'weight_decay': 0}
# {'test_loss': 3.801469326019287, 'test_f1': 0.5143589743589743, 'test_precision': 0.8222222222222223, 'test_recall': 0.5333333333333333, 'test_runtime': 7.6558, 'test_samples_per_second': 1.959, 'test_steps_per_second': 0.522}

# GatorTron - MD 80%
# best ps: {'batch_size': 4, 'epochs': 1, 'learning_rate': 5e-05, 'warmup_ratio': 0.1, 'weight_decay': 0.1}
# {'test_loss': 2.1605989933013916, 'test_f1': 0.637593984962406, 'test_precision': 0.645925925925926, 'test_recall': 0.6666666666666666, 'test_runtime': 7.6978, 'test_samples_per_second': 1.949, 'test_steps_per_second': 0.52}

# Gatorton TA - MD 60%
# {'batch_size': 4, 'epochs': 4, 'learning_rate': 5e-06, 'warmup_ratio': 0, 'weight_decay': 0}
# 'test_loss': 2.4067065715789795, 'test_f1': 0.6928104575163399, 'test_precision': 0.7714285714285715, 'test_recall': 0.6666666666666666, 'test_runtime': 7.6263, 'test_samples_per_second': 1.967, 'test_steps_per_second': 0.524}

# Gatorton TA - MD 80%