In [91]:
!pip install evaluate
!pip install rouge_score



In [92]:
import evaluate

In [93]:
def rouge_scores(predicitions: list, references:list):
    """
    Compute ROUGE scores (ROUGE-1, ROUGE-2, ROUGE-L, ROUGE-Lsum)
    between predicted texts and reference texts using Hugging Face's
    `evaluate` library.

    Args:
        predicitions (list): A list of generated text strings.
        references (list): A list of reference text strings.

    Returns:
        dict: Dictionary with ROUGE scores.
    """
    rouge = evaluate.load("rouge")
    return rouge.compute(predictions=predicitions, references=references)

def bleu_scores(predicitions: list, references:list):
    """
    Compute smoothed BLEU scores between predicted texts and reference
    texts using Hugging Face's `evaluate` library. Also expands and
    returns precision values for 1–4 n-grams.

    Args:
        predicitions (list): A list of generated text strings.
        references (list): A list of reference text strings.

    Returns:
        dict: Dictionary containing BLEU score, n-gram precisions,
              brevity penalty, and related statistics.
    """
    bleu = evaluate.load("bleu")
    res = bleu.compute(predictions=predicitions, references=references, smooth=True) # Use smooth=True to avoid reporting score 0 when there is no high-order n-gram overlap (such as 4-grams)
    precision_n_grams_scores = res.pop('precisions')
    for idx, item in enumerate(precision_n_grams_scores):
        res.update({f'precision_{idx+1}_grams': item})
    return res

In [97]:
# prd, ref = ["The cat sat on the mat"], ["The cat is on the mat"]

# prd, ref = ["Hi, the weather is nice today"], ["Hi, can I help you with anything today?"]

prd, ref = ["On June 12, 2025, at 2:40 PM, Emily Zhang, Line Operator, observed contaminated gloves during aseptic filling in Grade A Filling Line, Sterile Suite A. The incident was caused by Emily's unawareness of the non-sterile surface touched during setup. The line was stopped, gloves were changed, and vials were quarantined. An incident log was also created."], \
["On June 12, 2025, at 2:40 PM, during aseptic filling in Sterile Suite A, Emily Zhang contaminated her gloves by contacting a non-sterile surface unknowingly. The line was immediately stopped, gloves replaced, affected vials quarantined, and the event logged for review."]

In [98]:
rouge_scores(prd, ref)

{'rouge1': np.float64(0.8333333333333334),
 'rouge2': np.float64(0.6),
 'rougeL': np.float64(0.8333333333333334),
 'rougeLsum': np.float64(0.8333333333333334)}

In [99]:
bleu_scores(prd, ref)

{'bleu': 0.488923022434901,
 'brevity_penalty': 1.0,
 'length_ratio': 1.0,
 'translation_length': 6,
 'reference_length': 6,
 'precision_1_grams': 0.8571428571428571,
 'precision_2_grams': 0.6666666666666666,
 'precision_3_grams': 0.4,
 'precision_4_grams': 0.25}