In [None]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Check if CUDA (GPU) is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

In [None]:
# Load the fine-tuned model and tokenizer
model = T5ForConditionalGeneration.from_pretrained("fine_tuned_t5_vi_en")
tokenizer = T5Tokenizer.from_pretrained("fine_tuned_t5_vi_en")

# Move model to the selected device (GPU or CPU)
model.to(device)

GPU is available. Using GPU for translation.


In [None]:
# Function to translate Vietnamese to English using the fine-tuned model
def translate_text(input_text, model, tokenizer, device):
    # Preprocess the input text
    input_ids = tokenizer.encode(f"translate Vietnamese to English: {input_text}", return_tensors="pt").to(device)
    
    # Generate the translation (decoding the output token IDs)
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():
        translation_ids = model.generate(input_ids, max_length=256, num_beams=5, repetition_penalty=2.5, length_penalty=1.0, early_stopping=True)
    
    # Decode the generated tokens into the output text (English translation)
    output_text = tokenizer.decode(translation_ids[0], skip_special_tokens=True)
    
    return output_text

In [None]:
# Test the translation on a new Vietnamese sentence
src_sentence = "Theo lãnh đạo Sở Y tế, 3 người này không có triệu chứng sốt, ho, khó thở, đã được lấy mẫu xét nghiệm và cách ly tập trung."
translated_sentence = translate_text(src_sentence, model, tokenizer, device)

# Print the result
print("Original Vietnamese Sentence: ", src_sentence)
print("Translated English Sentence: ", translated_sentence)

Generated tokens: tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0