# Evaluating results of final models

In [4]:
# !pip install evaluate sentence_transformers jiwer torchmetrics

In [6]:
import csv
import os

import pandas as pd

from evaluations.corpus_similarity import CorpusSimilarity

results_file = "results.csv"
translations_filepath = "translations/"

with open(results_file, 'w', newline='') as csv_file:
    field_names = ['filename', 'sacrebleu', "'ter'", 'semsim']
    writer = csv.DictWriter(csv_file, fieldnames=field_names)
    if csv_file.tell() == 0:
        writer.writeheader()

    for root, dirs, files in os.walk(translations_filepath):
        for file_name in files:
            file_path = os.path.join(root, file_name)
            df = pd.read_csv(file_path)
            print(f"File: {file_path}")

            test_set = file_path.split('/')[-1]
            line = {'filename': test_set}

            for metric in CorpusSimilarity:
                score = metric.evaluate(df["reference"].tolist(), df["translation"].tolist())
                print(f"\t{metric}: {score}")

                if metric == CorpusSimilarity.SACREBLEU:
                    line['sacrebleu'] = score
                elif metric == CorpusSimilarity.TER:
                    line["'ter'"] = score
                else:
                    line['semsim'] = score

            writer.writerow(line)

File: translations/clinspen.csv
	SACREBLEU: 49.7
	TER: 60.4
	SEMANTIC_SIMILARITY: 93.1
File: translations/pubmed-te.csv
	SACREBLEU: 45.2
	TER: 54.8
	SEMANTIC_SIMILARITY: 92.2
File: translations/hpo.csv
	SACREBLEU: 47.3
	TER: 62.9
	SEMANTIC_SIMILARITY: 92.0
File: translations/orphanet-definitions-te.csv
	SACREBLEU: 61.0
	TER: 69.0
	SEMANTIC_SIMILARITY: 95.7
File: translations/khresmoi-te.csv
	SACREBLEU: 47.1
	TER: 61.7
	SEMANTIC_SIMILARITY: 94.7
