<a href="https://colab.research.google.com/github/wesslen/seamless_sacrebleu_evaluation/blob/main/notebooks/seamless_sacrebleu_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!pip install transformers sacrebleu tqdm torch

In [2]:
# Import required libraries
import torch
from transformers import SeamlessM4Tv2Model, AutoProcessor
from sacrebleu.metrics import BLEU
from typing import List, Union, Optional
import tqdm

class TranslationEvaluator:
    def __init__(self, device: str = "cuda" if torch.cuda.is_available() else "cpu"):
        """
        Initialize the translation evaluator with the Seamless model.

        Args:
            device: Device to run the model on ("cuda" or "cpu")
        """
        print(f"Using device: {device}")
        self.device = device
        print("Loading model and processor...")
        self.processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
        self.model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large").to(device)
        self.bleu = BLEU()
        print("Setup complete!")

    def translate_batch(self, texts: List[str], src_lang: str, tgt_lang: str, batch_size: int = 8) -> List[str]:
        """
        Translate a list of texts in batches.

        Args:
            texts: List of source texts to translate
            src_lang: Source language code (e.g., "eng", "fra")
            tgt_lang: Target language code
            batch_size: Batch size for translation

        Returns:
            List of translated texts
        """
        translations = []

        # Process in batches
        for i in tqdm.trange(0, len(texts), batch_size, desc="Translating"):
            batch = texts[i:i + batch_size]

            # Process input
            text_inputs = self.processor(
                text=batch,
                src_lang=src_lang,
                return_tensors="pt"
            ).to(self.device)

            # Generate translations
            with torch.no_grad():
                output_tokens = self.model.generate(
                    **text_inputs,
                    tgt_lang=tgt_lang,
                    generate_speech=False
                )

            # Decode translations
            # print(len(output_tokens))
            # print(len(output_tokens[0]))
            # print(len(output_tokens[0][0]))
            # print(output_tokens[0][0].tolist())
            # print(output_tokens[0].tolist())
            # print(output_tokens[0].tolist()[0])

            batch_translations = [
                self.processor.decode(tokens, skip_special_tokens=True)
                for tokens in output_tokens[0].tolist()
            ]
            translations.extend(batch_translations)

        return translations

    def evaluate_translations(
        self,
        hypotheses: List[str],
        references: Union[List[str], List[List[str]]],
        verbose: bool = True
    ) -> BLEU:
        """
        Evaluate translations using sacreBLEU.

        Args:
            hypotheses: List of system outputs (translations)
            references: List of reference translations. For multiple references,
                       provide a list of lists where each inner list contains
                       all references for one source sentence
            verbose: Whether to print the BLEU score

        Returns:
            BLEU score object
        """
        # Handle single reference case
        if isinstance(references[0], str):
            references = [references]

        # Calculate BLEU score
        bleu_score = self.bleu.corpus_score(hypotheses, references)

        if verbose:
            print(f"BLEU score: {bleu_score.score:.2f}")
            print(f"Signature: {self.bleu.get_signature()}")

        return bleu_score

# Example usage
if __name__ == "__main__":
    # Test data
    source_texts = [
        "Hello, my dog is cute",
        "The weather is nice today",
        "I love programming"
    ]

    # Reference translations in French
    references = [
        ["Bonjour, mon chien est mignon"],
        ["Le temps est beau aujourd'hui"],
        ["J'aime la programmation"]
    ]

    print("Initializing translator...")
    # Initialize evaluator
    evaluator = TranslationEvaluator()

    print("\nTranslating texts...")
    # Translate texts
    translations = evaluator.translate_batch(
        texts=source_texts,
        src_lang="eng",
        tgt_lang="fra"
    )

    print("\nEvaluating translations...")
    # Evaluate translations
    bleu_score = evaluator.evaluate_translations(
        hypotheses=translations,
        references=references
    )

    # Print detailed results
    print("\nDetailed Results:")
    print("-" * 50)
    for src, hyp, ref in zip(source_texts, translations, references):
        print(f"\nSource: {src}")
        print(f"System: {hyp}")
        print(f"Reference: {ref[0]}")

Initializing translator...
Using device: cuda
Loading model and processor...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setup complete!

Translating texts...


Translating: 100%|██████████| 1/1 [00:01<00:00,  1.25s/it]


Evaluating translations...
BLEU score: 75.98
Signature: nrefs:3|case:mixed|eff:no|tok:13a|smooth:exp|version:2.4.3

Detailed Results:
--------------------------------------------------

Source: Hello, my dog is cute
System: Salut, mon chien est mignon
Reference: Bonjour, mon chien est mignon

Source: The weather is nice today
System: Le temps est beau aujourd'hui.
Reference: Le temps est beau aujourd'hui

Source: I love programming
System: J'adore la programmation
Reference: J'aime la programmation





## Evaluation

In [3]:
!wget https://github.com/wesslen/seamless_sacrebleu_evaluation/raw/main/data/01-english-spanish-mapping.jsonl
!wget https://github.com/wesslen/seamless_sacrebleu_evaluation/raw/main/data/02-english-spanish-mapping.jsonl

--2024-11-12 20:58:17--  https://github.com/wesslen/seamless_sacrebleu_evaluation/raw/main/data/01-english-spanish-mapping.jsonl
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/wesslen/seamless_sacrebleu_evaluation/main/data/01-english-spanish-mapping.jsonl [following]
--2024-11-12 20:58:17--  https://raw.githubusercontent.com/wesslen/seamless_sacrebleu_evaluation/main/data/01-english-spanish-mapping.jsonl
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2829 (2.8K) [text/plain]
Saving to: ‘01-english-spanish-mapping.jsonl.1’


2024-11-12 20:58:17 (33.6 MB/s) - ‘01-english-spanish-mapping.jsonl.1

In [4]:
import json
from typing import List, Dict

def read_jsonl(file_path: str) -> List[Dict]:
    """
    Read a JSONL file and return its contents as a list of dictionaries.

    Args:
        file_path (str): Path to the JSONL file

    Returns:
        List[Dict]: List of dictionaries containing the parsed JSONL data
    """
    data = []
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            for line in file:
                try:
                    # Parse each line as a JSON object
                    json_obj = json.loads(line.strip())
                    data.append(json_obj)
                except json.JSONDecodeError as e:
                    print(f"Error parsing line: {e}")
                    continue
    except FileNotFoundError:
        print(f"File not found: {file_path}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return data

mapping_01 = read_jsonl("01-english-spanish-mapping.jsonl")
mapping_02 = read_jsonl("02-english-spanish-mapping.jsonl")

In [5]:
# Test data
source_texts_01 = [item["source_text"] for item in mapping_01]
source_texts_02 = [item["source_text"] for item in mapping_02]

# Reference translations in French
references_01 = [item["references"] for item in mapping_01]
references_02 = [item["references"] for item in mapping_02]

print("Initializing translator...")
# Initialize evaluator
evaluator = TranslationEvaluator()

print("\nTranslating texts...")
# Translate texts
translations_01 = evaluator.translate_batch(
    texts=source_texts_01,
    src_lang="eng",
    tgt_lang="spa"
)

print("\nEvaluating translations...")
# Evaluate translations
bleu_score_01 = evaluator.evaluate_translations(
    hypotheses=translations_01,
    references=references_01
)

Initializing translator...
Using device: cuda
Loading model and processor...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setup complete!

Translating texts...


Translating: 100%|██████████| 4/4 [00:02<00:00,  1.44it/s]


Evaluating translations...
BLEU score: 42.73
Signature: nrefs:31|case:mixed|eff:no|tok:13a|smooth:exp|version:2.4.3





In [6]:
# Print detailed results
print("\nDetailed Results:")
print("-" * 50)
for src, hyp, ref in zip(source_texts_01, translations_01, references_01):
    print(f"\nSource: {src}")
    print(f"System: {hyp}")
    print(f"Reference: {ref[0]}")


Detailed Results:
--------------------------------------------------

Source: Wells Fargo Bank | Financial Services & Online Banking
System: Wells Fargo Bank | Servicios financieros y banca en línea
Reference: Wells Fargo Bank | Servicios Financieros y Banca por Internet

Source: How can we help?
System: ¿Cómo podemos ayudar?
Reference: ¿Cómo podemos ayudarle?

Source: Popular FAQs
System: Preguntas frecuentes populares
Reference: Preguntas frecuentes populares

Source: How do I find my routing and account numbers?
System: ¿Cómo encuentro mi enrutamiento y números de cuenta?
Reference: ¿Cómo puedo encontrar mis números de ruta y de cuenta?

Source: Is there a fee for Zelle®?
System: ¿Hay una cuota para Zelle?
Reference: ¿Se aplica algún cargo por usar Zelle®?

Source: How do I report suspected fraud?
System: ¿Cómo puedo informar de sospecha de fraude?
Reference: ¿Qué debo hacer para reportar una sospecha de fraude?

Source: See more FAQs
System: Vea más preguntas y respuestas
Referenc

In [7]:
print("Initializing translator...")
# Initialize evaluator
evaluator = TranslationEvaluator()

print("\nTranslating texts...")
# Translate texts
translations_02 = evaluator.translate_batch(
    texts=source_texts_02,
    src_lang="eng",
    tgt_lang="spa"
)

print("\nEvaluating translations...")
# Evaluate translations
bleu_score_02 = evaluator.evaluate_translations(
    hypotheses=translations_02,
    references=references_02
)

Initializing translator...
Using device: cuda
Loading model and processor...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setup complete!

Translating texts...


Translating: 100%|██████████| 12/12 [00:12<00:00,  1.05s/it]


Evaluating translations...
BLEU score: 10.68
Signature: nrefs:96|case:mixed|eff:no|tok:13a|smooth:exp|version:2.4.3





In [8]:
# Print detailed results
print("\nDetailed Results:")
print("-" * 50)
for src, hyp, ref in zip(source_texts_02, translations_02, references_02):
    print(f"\nSource: {src}")
    print(f"System: {hyp}")
    print(f"Reference: {ref[0]}")


Detailed Results:
--------------------------------------------------

Source: Say hello to convenient checking
System: Dile hola a la comprobación conveniente
Reference: Hola a una cuenta de cheques conveniente

Source: Explore our checking options and choose the right account for you
System: Explore nuestras opciones de comprobación y elija la cuenta adecuada para usted
Reference: Explore nuestras opciones de cuentas de cheques y elija la adecuada para usted

Source: Get started
System: Empieza ahora.
Reference: Para comenzar

Source: Find mortgage happiness
System: Encuentra la felicidad de la hipoteca
Reference: Sienta alegría hipotecaria

Source: With a down payment as low as 3%
System: Con un pago inicial tan bajo como el 3%
Reference: Con un pago inicial bajo, desde tan solo el 3%

Source: Learn more
System: Más información
Reference: Más información

Source: Unlock convenient checking
System: Desbloqueo de comprobación conveniente
Reference: Acceda una cuenta de cheques conveni

## Libraries

In [9]:
# Now check versions
import pkg_resources
import sys

def get_package_details():
    """Print details of specific packages and Python version"""
    packages_to_check = [
        'torch',
        'transformers',
        'sacrebleu',
        'tqdm',
        'numpy',
        'sentencepiece'  # Often used by transformers
    ]

    print("Python version:", sys.version.split()[0])
    print("\nPackage versions:")
    print("-" * 50)

    for package in packages_to_check:
        try:
            version = pkg_resources.get_distribution(package).version
            print(f"{package:<15} {version}")
        except pkg_resources.DistributionNotFound:
            print(f"{package:<15} Not installed")

# Check CUDA availability for PyTorch
import torch
print("\nCUDA Status:")
print("-" * 50)
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Current GPU: {torch.cuda.get_device_name()}")

# Run the check
get_package_details()

  import pkg_resources



CUDA Status:
--------------------------------------------------
CUDA available: True
CUDA version: 12.1
Current GPU: NVIDIA A100-SXM4-40GB
Python version: 3.10.12

Package versions:
--------------------------------------------------
torch           2.5.0+cu121
transformers    4.46.2
sacrebleu       2.4.3
tqdm            4.66.6
numpy           1.26.4
sentencepiece   0.2.0
