# Running pre-trained models on biomedical texts to evaluate them
## Infrastructure

In [None]:
!pip install evaluate
!pip install nltk
!pip install sentence_transformers

from enum import Enum

import evaluate
import nltk
from sentence_transformers import SentenceTransformer, util


class SimilarityMetric(Enum):
    """Enum for string similarity metrics. Each metric must implement the evaluate method."""
    SACREBLEU = 0
    SEMANTIC_SIMILARITY = 1
    EDIT_DISTANCE = 2

    def evaluate(self, references: list[str], candidates: list[str]) -> float:
        """Evaluate the given  similarity metric between two corpora.
        Performs simple string cleaning for whitespace and punctuation.
        :param references: list of references (official translations)
        :param candidates: list of candidates (model translations
        :return: average similarity score when evaluating this specific metric on the corpora
        """
        if self == SimilarityMetric.SACREBLEU:
            # sacrebleu expects a list of references for each candidate
            references = [[ref] for ref in references]
            predictions = [cand for cand in candidates]
            sacrebleu = evaluate.load("sacrebleu")
            results = sacrebleu.compute(predictions=predictions, references=references)
            return round(results["score"], 1)

        elif self == SimilarityMetric.EDIT_DISTANCE:
            n = len(references)
            score = 0
            for reference, candidate in zip(references, candidates):
                score += (1 - nltk.edit_distance(reference, candidate) / max(len(reference), len(candidate)))
            return round(score / n, 3)

        else:  # semantic similarity
            similarity_model = SentenceTransformer("paraphrase-multilingual-mpnet-base-v2")
            n = len(references)
            score = 0
            for reference, candidate in zip(references, candidates):
                query_embedding = similarity_model.encode(reference)
                passage_embedding = similarity_model.encode(candidate)
                cosine_similarity = util.cos_sim(query_embedding, passage_embedding)
                score += cosine_similarity[0].item()
            return round(score / n, 3)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[0m

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m


In [24]:
import pandas as pd
import os

folder_path = os.getcwd() + "/translations"

for root, dirs, files in os.walk(folder_path):
    for file_name in files:
        file_path = os.path.join(root, file_name)
        df = pd.read_csv(file_path)
        print(f"File: {file_path}")

        for metric in SimilarityMetric:
            score = metric.evaluate(df["Reference"].tolist(), df["Actual"].tolist())
            print(f"\t{metric.name}: {score}")


File: /Users/zaki/PycharmProjects/hpo_translation/pretrained_models/preliminary_experiments/translations/abstract5/facebook.csv
	SACREBLEU: 18.7
	SEMANTIC_SIMILARITY: 0.8756338953971863
	EDIT_DISTANCE: 0.4411531474040361
File: /Users/zaki/PycharmProjects/hpo_translation/pretrained_models/preliminary_experiments/translations/abstract5/Helsinki-NLP.csv
	SACREBLEU: 34.2
	SEMANTIC_SIMILARITY: 0.951473867893219
	EDIT_DISTANCE: 0.6284120710816267
File: /Users/zaki/PycharmProjects/hpo_translation/pretrained_models/preliminary_experiments/translations/abstract5/t5-base.csv
	SACREBLEU: 0.1
	SEMANTIC_SIMILARITY: 0.7258614778518677
	EDIT_DISTANCE: 0.18300006923955556
