<a href="https://colab.research.google.com/github/ymoslem/Adaptive-MT-LLM/blob/main/MT/DeepL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DeepL API

Register to get your API at: https://www.deepl.com/pro-api

In [None]:
import requests

# ✳️ List of source sentences to be translated
texts = ["Hello", "Thanks!"]

# ✳️ Change the source and target language codes
source_lang = "EN" 
target_lang = "FR"  # "ES", "ZH", etc

# ✳️ Use only one URL based on whether you have a free or paid account

# If you are using a free trial
API_free_translation_endpoint = "https://api-free.deepl.com/v2/translate"
# If you have a paid account
API_translation_endpoint = "https://api.deepl.com/v2/translate"

# ✳️ Change this to your API key
API_Auth_Key = "DeepL-Auth-Key rest_of_your_key_here"  


headers = {
    'Authorization': API_Auth_Key
}

data={"text": texts,
      "source_lang": source_lang,
      "target_lang": target_lang,
  }

response = requests.post(API_translation_endpoint, headers=headers, data=data)

if response.status_code == 200:
  print(response.text)
else:
  print(response.status_code)

In [None]:
import json
response_json = json.loads(response.text)

translations = [output["text"] for output in response_json["translations"]]
print(translations)

# Full file translation

In [None]:
# Open files

import os

# ✳️ Change directory where the files are located
os.chdir("/content/drive/MyDrive/data/")

# ✳️ Change file names
src_file = "tico-19-enfr-dedup.en"
tgt_file = "tico-19-enfr-dedup.fr"


with open(src_file) as src, open(tgt_file) as tgt:
  source_sentences = [sent.strip() for sent in src.readlines()]
  target_sentences = [sent.strip() for sent in tgt.readlines()]

  print(source_sentences[0])
  print(target_sentences[0])

In [None]:
# Devide a long list of source sentences into smaller chucks
# to fit into an API request

def divide_chunks(l, n):
  # looping till length l
  for i in range(0, len(l), n):
    yield l[i:i + n]

In [None]:
import requests
import json
from tqdm.notebook import tqdm

# ✳️ Change the source and target language codes
source_lang = "EN"
target_lang = "FR"

headers = {
    'Authorization': API_Auth_Key
}

n = 100
total = int(len(source_sentences) / n)

translations = []
for chunk_source_sentences in tqdm(divide_chunks(source_sentences, n), total=total):
  data={"text": chunk_source_sentences,
       "source_lang": source_lang,
       "target_lang": target_lang,
  }
  response = requests.post(API_translation_endpoint, headers=headers, data=data)
  
  if response.status_code == 200:
    response_json = json.loads(response.text)
    chunk_translations = [output["text"] for output in response_json["translations"]]
    translations += chunk_translations
  else:
    print(response.status_code)
    break

In [None]:
# Print the length of generated translations
print(len(translations))

In [None]:
# Print the first 5 translations
print(*translations[:5], sep="\n")

In [None]:
import os

# ✳️ Change directory where the files should be saved
os.chdir("/content/drive/MyDrive/data/")

# ✳️ Change file name
translations_file = "tico-19-enfr-deepl.fr"

# Save translations
with open(translations_file, "w+") as output:
  for translation in translations:
    output.write(translation.strip() + "\n")

print("Translation file saved at:", translations_file)