-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix, chore: Correct base translate Provider, fix test cases, add doc …
…string
- Loading branch information
Showing
9 changed files
with
147 additions
and
73 deletions.
There are no files selected for viewing
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from typing import Union, List, Any | ||
from abc import ABC, abstractmethod | ||
from types import SimpleNamespace | ||
|
||
|
||
class Provider(ABC): | ||
""" | ||
Base Provider that must be inherited by all Provider class, implement your own provider by inheriting this class | ||
""" | ||
@abstractmethod | ||
def __init__(self): | ||
self.translator = None | ||
|
||
@abstractmethod | ||
def _do_translate(self, input_data: Union[str, List[str]], src: str, dest: str, **kwargs) -> Union[str, List[str], Any]: | ||
raise NotImplemented(" The function _do_translate has not been implemented.") | ||
|
||
def translate(self, input_data: Union[str, List[str]], src: str, dest: str) -> Union[SimpleNamespace, List[SimpleNamespace]]: | ||
""" | ||
Translate text input_data from a language to another language | ||
:param input_data: The input_data (Can be string or list of strings) | ||
:param src: The source lang of input_data | ||
:param dest: The target lang you want input_data to be translated | ||
:return: SimpleNamespace object or list of SimpleNamespace objects with 'text' attribute | ||
""" | ||
|
||
# Type check for input_data | ||
if not isinstance(input_data, (str, list)): | ||
raise TypeError(f"input_data must be of type str or List[str], not {type(input_data).__name__}") | ||
|
||
if isinstance(input_data, list) and not all(isinstance(item, str) for item in input_data): | ||
raise TypeError("All elements of input_data list must be of type str") | ||
|
||
# Ensure the translator is set | ||
assert self.translator, "Please assign the translator object instance to self.translator" | ||
|
||
# Perform the translation | ||
translated_instance = self._do_translate(input_data, src=src, dest=dest) | ||
|
||
# Wrap non-list objects in SimpleNamespace if they don't have a 'text' attribute | ||
if not isinstance(translated_instance, list): | ||
if not hasattr(translated_instance, 'text'): | ||
return SimpleNamespace(text=translated_instance) | ||
else: | ||
# Wrap each item in the list in SimpleNamespace if the item doesn't have a 'text' attribute | ||
return [SimpleNamespace(text=item) if not hasattr(item, 'text') else item for item in translated_instance] | ||
|
||
return translated_instance | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import sys | ||
from typing import Union, List, Any | ||
sys.path.insert(0, r'/') | ||
from googletrans import Translator | ||
from .base_provider import Provider | ||
|
||
|
||
# https://github.com/ssut/py-googletrans | ||
# This is the best reliable provider, as this has access to API call instead of using the crawling method | ||
class GoogleProvider(Provider): | ||
def __init__(self): | ||
self.translator = Translator() | ||
|
||
def _do_translate(self, input_data: Union[str, List[str]], src: str, dest: str, **kwargs) -> Union[str, List[str], Any]: | ||
""" | ||
translate(text, dest='en', src='auto', **kwargs) | ||
Translate text from source language to destination language | ||
Parameters: | ||
text (UTF-8 str; unicode; string sequence (list, tuple, iterator, generator)) – The source text(s) to be translated. Batch translation is supported via sequence input. | ||
dest – The language to translate the source text into. The value should be one of the language codes listed in googletrans.LANGUAGES or one of the language names listed in googletrans.LANGCODES. | ||
dest – str; unicode | ||
src – The language of the source text. The value should be one of the language codes listed in googletrans.LANGUAGES or one of the language names listed in googletrans.LANGCODES. If a language is not specified, the system will attempt to identify the source language automatically. | ||
src – str; unicode | ||
Return type: | ||
Translated | ||
Return type: list (when a list is passed) else str | ||
""" | ||
|
||
return self.translator.translate(input_data, src=src, dest=dest) | ||
|
||
|
||
if __name__ == '__main__': | ||
test = GoogleProvider() | ||
print(test.translate("Hello", src="en", dest="vi").text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.