# Evaluating a Model Checkpoint

This notebook demonstrates how to evaluate the given model checkpoint trained on NYT.

## Imports

In [1]:
from transformers import BertTokenizerFast, Trainer
from dataprocess.data_extractor import unirel_span_extractor
from dataprocess.data_processor import UniRelDataProcessor
from dataprocess.dataset import UniRelSpanDataset
from model.model_transformers import UniRelModel

## Test Dataset

In [2]:
added_token = [f"[unused{i}]" for i in range(1, 17)]
tokenizer = BertTokenizerFast.from_pretrained(
    "bert-base-cased",
    additional_special_tokens=added_token,
    do_basic_tokenize=False,
)
data_processor = UniRelDataProcessor(
    root="data",
    tokenizer=tokenizer,
    dataset_name="nyt",
)
test_samples = data_processor.get_test_sample()
test_dataset = UniRelSpanDataset(
    test_samples,
    data_processor,
    tokenizer,
    mode='test',
    ignore_label=-100,
    model_type='bert',
    ngram_dict=None,
    max_length=150 + 2,
    predict=True,
    eval_type="test"
)
print(len(test_dataset))

100%|██████████| 5000/5000 [00:03<00:00, 1531.40it/s]

139
more than 100: 49
more than 150: 0
5000





## Load Model

In [3]:
checkpoint = "./output/nyt/checkpoint-final"
model = UniRelModel.from_pretrained(checkpoint)

## Predictions

In [4]:
trainer = Trainer(model=model)
test_prediction = trainer.predict(test_dataset, ignore_keys=["loss"])



## Metrics

In [5]:
unirel_span_extractor(
    tokenizer=tokenizer,
    dataset=test_dataset,
    predictions=test_prediction,
    path="./output/nyt",
)


all:  {'p': 8057, 'c': 7548, 'g': 8120} 
 {'all-prec': 0.9368251210127839, 'all-recall': 0.9295566502463054, 'all-f1': 0.9331767323978487}


(0.9368251210127839, 0.9295566502463054, 0.9331767323978487)