-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from vTuanpham/feat/Providers
Feat/providers
- Loading branch information
Showing
9 changed files
with
225 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .base_provider import Provider | ||
from .google_provider import GoogleProvider | ||
from .multiple_providers import MultipleProviders |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from typing import Union, List, Any | ||
from abc import ABC, abstractmethod | ||
from types import SimpleNamespace | ||
|
||
|
||
class Provider(ABC): | ||
""" | ||
Base Provider that must be inherited by all Provider class, implement your own provider by inheriting this class | ||
""" | ||
@abstractmethod | ||
def __init__(self): | ||
self.translator = None | ||
|
||
@abstractmethod | ||
def _do_translate(self, input_data: Union[str, List[str]], src: str, dest: str, **kwargs) -> Union[str, List[str], Any]: | ||
raise NotImplemented(" The function _do_translate has not been implemented.") | ||
|
||
def translate(self, input_data: Union[str, List[str]], src: str, dest: str) -> Union[SimpleNamespace, List[SimpleNamespace]]: | ||
""" | ||
Translate text input_data from a language to another language | ||
:param input_data: The input_data (Can be string or list of strings) | ||
:param src: The source lang of input_data | ||
:param dest: The target lang you want input_data to be translated | ||
:return: SimpleNamespace object or list of SimpleNamespace objects with 'text' attribute | ||
""" | ||
|
||
# Type check for input_data | ||
if not isinstance(input_data, (str, list)): | ||
raise TypeError(f"input_data must be of type str or List[str], not {type(input_data).__name__}") | ||
|
||
if isinstance(input_data, list) and not all(isinstance(item, str) for item in input_data): | ||
raise TypeError("All elements of input_data list must be of type str") | ||
|
||
# Ensure the translator is set | ||
assert self.translator, "Please assign the translator object instance to self.translator" | ||
|
||
# Perform the translation | ||
translated_instance = self._do_translate(input_data, src=src, dest=dest) | ||
|
||
# Wrap non-list objects in SimpleNamespace if they don't have a 'text' attribute | ||
if not isinstance(translated_instance, list): | ||
if not hasattr(translated_instance, 'text'): | ||
return SimpleNamespace(text=translated_instance) | ||
else: | ||
# Wrap each item in the list in SimpleNamespace if the item doesn't have a 'text' attribute | ||
return [SimpleNamespace(text=item) if not hasattr(item, 'text') else item for item in translated_instance] | ||
|
||
return translated_instance | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import sys | ||
from typing import Union, List, Any | ||
sys.path.insert(0, r'/') | ||
from googletrans import Translator | ||
from .base_provider import Provider | ||
|
||
|
||
# https://github.com/ssut/py-googletrans | ||
# This is the best reliable provider, as this has access to API call instead of using the crawling method | ||
class GoogleProvider(Provider): | ||
def __init__(self): | ||
self.translator = Translator() | ||
|
||
def _do_translate(self, input_data: Union[str, List[str]], src: str, dest: str, **kwargs) -> Union[str, List[str], Any]: | ||
""" | ||
translate(text, dest='en', src='auto', **kwargs) | ||
Translate text from source language to destination language | ||
Parameters: | ||
text (UTF-8 str; unicode; string sequence (list, tuple, iterator, generator)) – The source text(s) to be translated. Batch translation is supported via sequence input. | ||
dest – The language to translate the source text into. The value should be one of the language codes listed in googletrans.LANGUAGES or one of the language names listed in googletrans.LANGCODES. | ||
dest – str; unicode | ||
src – The language of the source text. The value should be one of the language codes listed in googletrans.LANGUAGES or one of the language names listed in googletrans.LANGCODES. If a language is not specified, the system will attempt to identify the source language automatically. | ||
src – str; unicode | ||
Return type: | ||
Translated | ||
Return type: list (when a list is passed) else str | ||
""" | ||
|
||
return self.translator.translate(input_data, src=src, dest=dest) | ||
|
||
|
||
if __name__ == '__main__': | ||
test = GoogleProvider() | ||
print(test.translate("Hello", src="en", dest="vi").text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import sys | ||
sys.path.insert(0, r'/') | ||
from typing import Union, List | ||
import translators as ts | ||
from .base_provider import Provider | ||
|
||
|
||
# https://github.com/UlionTse/translators | ||
# This library is not as reliable of a provider as googletrans, use this if you want to try out other translation services | ||
class MultipleProviders(Provider): | ||
def __init__(self, cache: bool = False): | ||
self.translator = ts | ||
self.config = { | ||
"translator": "baidu", | ||
"timeout": 10.0, | ||
"if_ignore_empty_query": True | ||
} | ||
if cache: | ||
_ = self.translator.preaccelerate_and_speedtest() # Optional. Caching sessions in advance, which can help improve access speed. | ||
|
||
def _do_translate(self, input_data: Union[str, List[str]], src: str, dest: str) -> Union[str, List[str]]: | ||
""" | ||
translate_text(query_text: str, translator: str = 'bing', from_language: str = 'auto', to_language: str = 'en', **kwargs) -> Union[str, dict] | ||
:param query_text: str, must. | ||
:param translator: str, default 'bing'. | ||
:param from_language: str, default 'auto'. | ||
:param to_language: str, default 'en'. | ||
:param if_use_preacceleration: bool, default False. | ||
:param **kwargs: | ||
:param is_detail_result: bool, default False. | ||
:param professional_field: str, default None. Support alibaba(), baidu(), caiyun(), cloudTranslation(), elia(), sysTran(), youdao(), volcEngine() only. | ||
:param timeout: float, default None. | ||
:param proxies: dict, default None. | ||
:param sleep_seconds: float, default 0. | ||
:param update_session_after_freq: int, default 1000. | ||
:param update_session_after_seconds: float, default 1500. | ||
:param if_use_cn_host: bool, default False. Support google(), bing() only. | ||
:param reset_host_url: str, default None. Support google(), yandex() only. | ||
:param if_check_reset_host_url: bool, default True. Support google(), yandex() only. | ||
:param if_ignore_empty_query: bool, default False. | ||
:param limit_of_length: int, default 20000. | ||
:param if_ignore_limit_of_length: bool, default False. | ||
:param if_show_time_stat: bool, default False. | ||
:param show_time_stat_precision: int, default 2. | ||
:param if_print_warning: bool, default True. | ||
:param lingvanex_mode: str, default 'B2C', choose from ("B2C", "B2B"). | ||
:param myMemory_mode: str, default "web", choose from ("web", "api"). | ||
:return: str or dict | ||
""" | ||
# This provider does not support batch translation | ||
if isinstance(input_data, list): | ||
translated_data = [] | ||
for text in input_data: | ||
translated_text = self.translator.translate_text(text, from_language=src, to_language=dest, **self.config) | ||
translated_data.append(translated_text) | ||
else: | ||
translated_data = self.translator.translate_text(input_data, from_language=src, to_language=dest, **self.config) | ||
|
||
return translated_data | ||
|
||
|
||
if __name__ == '__main__': | ||
test = MultipleProviders() | ||
print(test.translate("Hello", src="en", dest="vi").text) | ||
|
||
""" | ||
Supported languages: | ||
['ach', 'afr', 'aka', 'alb', 'amh', 'ara', 'arg', 'arm', 'arq', 'asm', 'ast', 'auto', 'aym', 'aze', 'bak', 'bal', 'baq', 'bel', 'bem', 'ben', 'ber', 'bho', 'bis', 'bl | ||
i', 'bos', 'bre', 'bul', 'bur', 'cat', 'ceb', 'chr', 'cht', 'chv', 'cor', 'cos', 'cre', 'cri', 'cs', 'dan', 'de', 'div', 'el', 'en', 'eno', 'epo', 'est', 'fao', 'fil', 'fin', 'fra', 'fri', 'frm', 'frn', 'fry', 'ful', 'geo', 'gla', 'gle | ||
', 'glg', 'glv', 'gra', 'grn', 'guj', 'hak', 'hau', 'haw', 'heb', 'hi', 'hil', 'hkm', 'hmn', 'hrv', 'ht', 'hu', 'hup', 'ibo', 'ice', 'id', 'ido', 'iku', 'ina', 'ing', 'it', 'jav', 'jp', 'kab', 'kah', 'kal', 'kan', 'kas', 'kau', 'kin', | ||
'kir', 'kli', 'kok', 'kon', 'kor', 'kur', 'lag', 'lao', 'lat', 'lav', 'lim', 'lin', 'lit', 'log', 'loj', 'los', 'ltz', 'lug', 'mac', 'mah', 'mai', 'mal', 'mao', 'mar', 'mau', 'may', 'mg', 'mlt', 'mot', 'nbl', 'nea', 'nep', 'nl', 'nno', | ||
'nob', 'nor', 'nqo', 'nya', 'oci', 'oji', 'ori', 'orm', 'oss', 'pam', 'pan', 'pap', 'ped', 'per', 'pl', 'pot', 'pt', 'pus', 'que', 'ro', 'roh', 'rom', 'ru', 'ruy', 'san', 'sco', 'sec', 'sha', 'sil', 'sin', 'sk', 'slo', 'sm', 'sme', 's | ||
na', 'snd', 'sol', 'som', 'sot', 'spa', 'src', 'srd', 'srp', 'sun', 'swa', 'swe', 'syr', 'tam', 'tat', 'tel', 'tet', 'tgk', 'tgl', 'th', 'tir', 'tr', 'tso', 'tua', 'tuk', 'twi', 'ukr', 'ups', 'urd', 'ven', 'vie', 'wel', 'wln', 'wol', ' | ||
wyw', 'xho', 'yid', 'yor', 'yue', 'zaz', 'zh', 'zul'] | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
googletrans==3.1.0a0 | ||
translators | ||
datasets | ||
tqdm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters