-
Notifications
You must be signed in to change notification settings - Fork 8
/
translator.py
153 lines (117 loc) · 4.58 KB
/
translator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from urllib.error import URLError
from .jsonrpc import JSONRPCBuilder
POST_URL = "https://www2.deepl.com/jsonrpc"
AUTO_LANG = "AUTO"
TARGET_LANGS = ["EN", "DE", "FR", "ES", "IT", "NL", "PL", "PT", "RU"]
SOURCE_LANGS = TARGET_LANGS + [AUTO_LANG]
LENGTH_LIMIT = 5000
class Translator():
def __init__(self, src_lang, dst_lang, check_length_limit=True):
"""The Translator class.
:param src_lang: The source language.
:param dst_lang: The output language.
:param check_length_limit: whether to check strings for length or not.
Default is ``True``.
"""
self.src_lang = src_lang.upper()
self.dst_lang = dst_lang.upper()
if self.src_lang not in SOURCE_LANGS:
raise ValueError("Input language not supported.")
if self.dst_lang not in TARGET_LANGS:
raise ValueError("Output language not supported.")
self.check_length_limit = check_length_limit
def split_into_sentences(self, text):
"""
Split a string into sentences using the DeepL API.
:param text: A string to be split.
:returns: A list of sentences with type string.
:raises TranslationError: If there was an exception during the
translation.
"""
if not text:
return []
method = "LMT_split_into_sentences"
params = {
"texts": [text.strip()],
"lang": {}
}
if self.src_lang != AUTO_LANG:
params["lang"]["lang_user_selected"] = self.src_lang
resp = _send_jsonrpc(method, params)
return resp["splitted_texts"][0]
def translate_sentences(self, sentences):
"""
Translate a list of single sentences or string of sentences into a list
of translations. If a string was passed, it will be split into a list
of sentences using the DeepL API first.
:param sentences: A list of strings or string to be translated.
:returns: A list of translated strings.
:raises LengthLimitExceeded: If the length of a string exeeds the
length limit of the DeepL API, an exception is raised.
:raises TranslationError: If there was an exception during the
translation.
"""
# catch None, empty string and empty list
if not sentences:
return []
elif type(sentences) is str:
sentences = self.split_into_sentences(sentences)
jobs = self._build_jobs(sentences)
method = "LMT_handle_jobs"
params = {
"jobs": jobs,
"lang": {
"target_lang": self.dst_lang
}
}
if self.src_lang != AUTO_LANG:
params["lang"]["source_lang"] = self.src_lang
resp = _send_jsonrpc(method, params)
translations = resp["translations"]
def extract(obj):
if obj["beams"]:
return obj["beams"][0]["postprocessed_sentence"]
else:
return EmptyTranslation()
return [extract(obj) for obj in translations]
def translate_sentence(self, sentence):
"""
Translate a single sentence. Be aware that translation might be
incorrect if a string with multiple sentences is passed. If unsure,
use ``translate_sentences`` or split the string via
``split_into_sentences`` first.
:param sentence: A string to be translated.
:returns: The translated string.
:raises LengthLimitExceeded: If the length of the string exeeds the
length limit of the DeepL API, an exception is raised.
:raises TranslationError: If there was an exception during the
translation.
"""
if not sentence:
return ""
return self.translate_sentences([sentence])[0]
def _build_jobs(self, sentences):
jobs = list()
for s in sentences:
if self.check_length_limit and len(s) > LENGTH_LIMIT:
raise LengthLimitExceeded()
else:
job = {"kind": "default", "raw_en_sentence": s}
jobs.append(job)
return jobs
def _send_jsonrpc(method, params):
try:
rpc = JSONRPCBuilder(method, params)
return rpc.send(POST_URL)
except URLError as e:
raise TranslationError(e.reason)
class LengthLimitExceeded(Exception):
pass
class TranslationError(Exception):
def __init__(self, reason):
self.reason = reason
def __repr__(self):
return "TranslationError: " + self.reason
class EmptyTranslation():
def __repr__(self):
return "<EmptyTranslation>"