<a href="https://colab.research.google.com/github/ymoslem/Adaptive-MT-LLM/blob/main/MT/GoogleTranslate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Google Cloud Translation API

0. [Overview and Documentation](https://cloud.google.com/translate)
1. [Select or create a Cloud Platform project](https://console.cloud.google.com/cloud-resource-manager).
2. [Enable billing for your project](https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project).
3. [Enable the Cloud Translation API](https://console.cloud.google.com/apis/library/translate.googleapis.com).
4. Setup Authentication (see the next two cells)

In [None]:
!pip3 install google-auth tqdm -q

In [None]:
# Run these commands for authentication
!gcloud init --console-only --skip-diagnostics
!gcloud auth application-default login

In [None]:
from google.cloud import translate_v2 as translate

def translate_multiple_sentences(target, text_list, model="nmt"):
    translate_client = translate.Client()

    result = translate_client.translate(text_list, target_language=target, model=model)

    return [output["translatedText"] for output in result]

In [None]:
# The target must be an ISO 639-1 language code, e.g. "ar", "en", "fr", etc.
# https://cloud.google.com/translate/docs/basic/discovering-supported-languages#translate_list_codes-python

translation = translate_multiple_sentences("ar", ["Hello World!", "What do you think?"])
print(translation)

['مرحبا بالعالم!', 'ماذا تعتقد؟']


# Full file translation

In [None]:
# Download sample files

# Source
!wget https://raw.githubusercontent.com/ymoslem/Adaptive-MT-LLM/main/data/tico-19/tico-19-enes-dedup.en
# Target
!wget https://raw.githubusercontent.com/ymoslem/Adaptive-MT-LLM/main/data/tico-19/tico-19-enes-dedup.es

In [None]:
# Open files

# ✳️ Change file paths and names
src_file = "tico-19-enes-dedup.en"
tgt_file = "tico-19-enes-dedup.es"


with open(src_file) as src, open(tgt_file) as tgt:
  source_sentences = [sent.strip() for sent in src.readlines()]
  target_sentences = [sent.strip() for sent in tgt.readlines()]

  print(source_sentences[0])
  print(target_sentences[0])

In [None]:
# Devide a long list of source sentences into smaller chucks
# to fit into an API request

def divide_chunks(l, n):
    # looping till length l
    for i in range(0, len(l), n):
        yield l[i:i + n]

In [None]:
# ✳️ Change the target language code
tgt_lang = "es"  # Spanish

# Translate
translations = []
for chunk in divide_chunks(source_sentences, 100):
  chunk_translations = translate_multiple_sentences(tgt_lang, chunk)
  translations += chunk_translations

In [None]:
# Print the length of generated translations
print(len(translations))

In [None]:
# Print the first 5 translations
print(*translations[:5], sep="\n")

In [None]:
# Save the translation file

# ✳️ Change file path and name
translations_file = "tico-19-enes-google.es"

# Save translations
with open(translations_file, "w+") as output:
  for translation in translations:
    output.write(translation.strip() + "\n")

print("Translation file saved at:", translations_file)