In [1]:
!pip install transformers sentencepiece



In [2]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Move model to GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
print("Model loaded on", device)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Model loaded on cuda


In [4]:
!rm -rf bleurt
!pip install git+https://github.com/google-research/bleurt.git
!pip install tensorflow  # BLEURT requires TensorFlow

Collecting git+https://github.com/google-research/bleurt.git
  Cloning https://github.com/google-research/bleurt.git to /tmp/pip-req-build-dui3odwd
  Running command git clone --filter=blob:none --quiet https://github.com/google-research/bleurt.git /tmp/pip-req-build-dui3odwd
  Resolved https://github.com/google-research/bleurt.git to commit cebe7e6f996b40910cfaa520a63db47807e3bf5c
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [5]:
!wget https://storage.googleapis.com/bleurt-oss/bleurt-base-128.zip
!unzip bleurt-base-128.zip

--2025-04-23 22:37:31--  https://storage.googleapis.com/bleurt-oss/bleurt-base-128.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 64.233.170.207, 142.251.175.207, 74.125.24.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|64.233.170.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 405489453 (387M) [application/zip]
Saving to: ‘bleurt-base-128.zip.1’


2025-04-23 22:37:49 (21.2 MB/s) - ‘bleurt-base-128.zip.1’ saved [405489453/405489453]

Archive:  bleurt-base-128.zip
replace bleurt-base-128/vocab.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


In [6]:
!pip install -q sacrebleu

In [7]:
import json
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from bleurt import score as bleurt_score
import sacrebleu

# Load model
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

print(f"Model loaded on {device}")

# ----- CONFIGURATION -----
input_file = "ne_sin_devtest.jsonl"           # or sin_ne_devtest.jsonl
output_file = "direct_predicted_si_devtest.txt"
src_lang = "npi_Deva"                          # or "sin_Sinh" if reversed
tgt_lang = "sin_Sinh"                          # or "npi_Deva" if reversed
checkpoint = "bleurt-base-128"
# --------------------------

# Load data
src_sents, tgt_refs = [], []
with open(input_file, "r", encoding="utf-8") as f:
    for line in f:
        data = json.loads(line)
        src_sents.append(data["src"])
        tgt_refs.append(data["tgt"])
print(f"Loaded {len(src_sents)} sentence pairs from {input_file}")

# Translate in batches
def translate_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang):
    tokenizer.src_lang = src_lang
    inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    bos_token_id = tokenizer.convert_tokens_to_ids(tgt_lang)
    outputs = model.generate(**inputs, forced_bos_token_id=bos_token_id, max_length=512)
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

predictions = []
batch_size = 8
for i in tqdm(range(0, len(src_sents), batch_size), desc="Translating"):
    batch = src_sents[i:i+batch_size]
    predictions.extend(translate_batch(batch))

# Save output
with open(output_file, "w", encoding="utf-8") as f:
    for line in predictions:
        f.write(line.strip() + "\n")
print(f"Saved predictions to {output_file}")

# ----- Evaluation -----
# BLEURT
scorer = bleurt_score.BleurtScorer(checkpoint)
bleurt_scores = scorer.score(references=tgt_refs, candidates=predictions)
avg_bleurt = sum(bleurt_scores) / len(bleurt_scores)
print(f"BLEURT score: {avg_bleurt:.4f}")

# BLEU & ChrF
bleu = sacrebleu.corpus_bleu(predictions, [tgt_refs]).score
chrf = sacrebleu.corpus_chrf(predictions, [tgt_refs]).score
print(f"SacreBLEU:     {bleu:.2f}")
print(f"ChrF:          {chrf:.2f}")


Model loaded on cuda
Loaded 1012 sentence pairs from ne_sin_devtest.jsonl


Translating: 100%|██████████| 127/127 [04:46<00:00,  2.26s/it]


Saved predictions to direct_predicted_si_devtest.txt
BLEURT score: 0.4369
SacreBLEU:     9.47
ChrF:          41.07


In [8]:
import json
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from bleurt import score as bleurt_score
import sacrebleu

# Load model
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

print(f"Model loaded on {device}")

# ----- CONFIGURATION -----
input_file = "ne_sin_dev.jsonl"           # or sin_ne_devtest.jsonl
output_file = "direct_predicted_si_dev.txt"
src_lang = "npi_Deva"                          # or "sin_Sinh" if reversed
tgt_lang = "sin_Sinh"                          # or "npi_Deva" if reversed
checkpoint = "bleurt-base-128"
# --------------------------

# Load data
src_sents, tgt_refs = [], []
with open(input_file, "r", encoding="utf-8") as f:
    for line in f:
        data = json.loads(line)
        src_sents.append(data["src"])
        tgt_refs.append(data["tgt"])
print(f"Loaded {len(src_sents)} sentence pairs from {input_file}")

# Translate in batches
def translate_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang):
    tokenizer.src_lang = src_lang
    inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    bos_token_id = tokenizer.convert_tokens_to_ids(tgt_lang)
    outputs = model.generate(**inputs, forced_bos_token_id=bos_token_id, max_length=512)
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

predictions = []
batch_size = 8
for i in tqdm(range(0, len(src_sents), batch_size), desc="Translating"):
    batch = src_sents[i:i+batch_size]
    predictions.extend(translate_batch(batch))

# Save output
with open(output_file, "w", encoding="utf-8") as f:
    for line in predictions:
        f.write(line.strip() + "\n")
print(f"Saved predictions to {output_file}")

# ----- Evaluation -----
# BLEURT
scorer = bleurt_score.BleurtScorer(checkpoint)
bleurt_scores = scorer.score(references=tgt_refs, candidates=predictions)
avg_bleurt = sum(bleurt_scores) / len(bleurt_scores)
print(f"BLEURT score: {avg_bleurt:.4f}")

# BLEU & ChrF
bleu = sacrebleu.corpus_bleu(predictions, [tgt_refs]).score
chrf = sacrebleu.corpus_chrf(predictions, [tgt_refs]).score
print(f"SacreBLEU:     {bleu:.2f}")
print(f"ChrF:          {chrf:.2f}")


Model loaded on cuda
Loaded 997 sentence pairs from ne_sin_dev.jsonl


Translating: 100%|██████████| 125/125 [05:06<00:00,  2.45s/it]


Saved predictions to direct_predicted_si_dev.txt
BLEURT score: 0.4267
SacreBLEU:     8.92
ChrF:          40.66


In [9]:
import json
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from bleurt import score as bleurt_score
import sacrebleu

# Load model
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

print(f"Model loaded on {device}")

# ----- CONFIGURATION -----
input_file = "sin_ne_devtest.jsonl"           # or sin_ne_devtest.jsonl
output_file = "direct_predicted_ne_devtest.txt"
src_lang = "sin_Sinh"   # Input is Sinhala
tgt_lang = "npi_Deva"   # Output should be Nepali
checkpoint = "bleurt-base-128"
# --------------------------

# Load data
src_sents, tgt_refs = [], []
with open(input_file, "r", encoding="utf-8") as f:
    for line in f:
        data = json.loads(line)
        src_sents.append(data["src"])
        tgt_refs.append(data["tgt"])
print(f"Loaded {len(src_sents)} sentence pairs from {input_file}")

# Translate in batches
def translate_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang):
    tokenizer.src_lang = src_lang
    inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    bos_token_id = tokenizer.convert_tokens_to_ids(tgt_lang)
    outputs = model.generate(**inputs, forced_bos_token_id=bos_token_id, max_length=512)
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

predictions = []
batch_size = 8
for i in tqdm(range(0, len(src_sents), batch_size), desc="Translating"):
    batch = src_sents[i:i+batch_size]
    predictions.extend(translate_batch(batch))

# Save output
with open(output_file, "w", encoding="utf-8") as f:
    for line in predictions:
        f.write(line.strip() + "\n")
print(f"Saved predictions to {output_file}")

# ----- Evaluation -----
# BLEURT
scorer = bleurt_score.BleurtScorer(checkpoint)
bleurt_scores = scorer.score(references=tgt_refs, candidates=predictions)
avg_bleurt = sum(bleurt_scores) / len(bleurt_scores)
print(f"BLEURT score: {avg_bleurt:.4f}")

# BLEU & ChrF
bleu = sacrebleu.corpus_bleu(predictions, [tgt_refs]).score
chrf = sacrebleu.corpus_chrf(predictions, [tgt_refs]).score
print(f"SacreBLEU:     {bleu:.2f}")
print(f"ChrF:          {chrf:.2f}")


Model loaded on cuda
Loaded 1012 sentence pairs from sin_ne_devtest.jsonl


Translating: 100%|██████████| 127/127 [03:21<00:00,  1.59s/it]


Saved predictions to direct_predicted_ne_devtest.txt
BLEURT score: 0.0924
SacreBLEU:     8.44
ChrF:          42.26


In [10]:
import json
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from bleurt import score as bleurt_score
import sacrebleu

# Load model
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

print(f"Model loaded on {device}")

# ----- CONFIGURATION -----
input_file = "sin_ne_dev.jsonl"           # or sin_ne_devtest.jsonl
output_file = "direct_predicted_ne_dev.txt"
src_lang = "sin_Sinh"   # Input is Sinhala
tgt_lang = "npi_Deva"   # Output should be Nepali
checkpoint = "bleurt-base-128"
# --------------------------

# Load data
src_sents, tgt_refs = [], []
with open(input_file, "r", encoding="utf-8") as f:
    for line in f:
        data = json.loads(line)
        src_sents.append(data["src"])
        tgt_refs.append(data["tgt"])
print(f"Loaded {len(src_sents)} sentence pairs from {input_file}")

# Translate in batches
def translate_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang):
    tokenizer.src_lang = src_lang
    inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    bos_token_id = tokenizer.convert_tokens_to_ids(tgt_lang)
    outputs = model.generate(**inputs, forced_bos_token_id=bos_token_id, max_length=512)
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

predictions = []
batch_size = 8
for i in tqdm(range(0, len(src_sents), batch_size), desc="Translating"):
    batch = src_sents[i:i+batch_size]
    predictions.extend(translate_batch(batch))

# Save output
with open(output_file, "w", encoding="utf-8") as f:
    for line in predictions:
        f.write(line.strip() + "\n")
print(f"Saved predictions to {output_file}")

# ----- Evaluation -----
# BLEURT
scorer = bleurt_score.BleurtScorer(checkpoint)
bleurt_scores = scorer.score(references=tgt_refs, candidates=predictions)
avg_bleurt = sum(bleurt_scores) / len(bleurt_scores)
print(f"BLEURT score: {avg_bleurt:.4f}")

# BLEU & ChrF
bleu = sacrebleu.corpus_bleu(predictions, [tgt_refs]).score
chrf = sacrebleu.corpus_chrf(predictions, [tgt_refs]).score
print(f"SacreBLEU:     {bleu:.2f}")
print(f"ChrF:          {chrf:.2f}")


Model loaded on cuda
Loaded 997 sentence pairs from sin_ne_dev.jsonl


Translating: 100%|██████████| 125/125 [02:44<00:00,  1.32s/it]


Saved predictions to direct_predicted_ne_dev.txt
BLEURT score: 0.1012
SacreBLEU:     8.43
ChrF:          43.42
