Skip to content

Commit

Permalink
Add laonlp.transliterate.transliterate
Browse files Browse the repository at this point in the history
  • Loading branch information
wannaphong committed Aug 25, 2023
1 parent 652a513 commit e25676a
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 4 deletions.
3 changes: 2 additions & 1 deletion docs/api/transliterate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ Modules
-------

.. autofunction:: lao2thai_script
.. autofunction:: thai2lao_script
.. autofunction:: thai2lao_script
.. autofunction:: transliterate
15 changes: 14 additions & 1 deletion laonlp/transliterate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
"lao2thai_script",
"thai2lao_script",
"lao2thai_transliteration",
"thai2lao_transliteration"
"thai2lao_transliteration",
"transliterate"
]
from anyascii import anyascii

# Naive Lao script to Thai script transliteration.
# Data from https://github.com/google/language-resources/blob/master/lo/Laoo-Thai.txt
Expand Down Expand Up @@ -127,3 +129,14 @@ def thai2lao_script(text: str) -> str:
else:
new_text += c
return new_text

def transliterate(lao_word: str, engine:str="anyascii")->str:
"""
Lao transliterate
:param str sent: Lao text
:param str engine: engine. Now, LaoNLP support anyascii only.
:return: returns a Lao transliteration.
:rtype: str
"""
return anyascii(lao_word)
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pythainlp>=3.0.0
huggingface_hub
gensim
gensim
anyascii>=0.3.2
5 changes: 4 additions & 1 deletion tests/test_transliterate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-

import unittest
from laonlp.transliterate import lao2thai_script, thai2lao_script
from laonlp.transliterate import lao2thai_script, thai2lao_script, transliterate


class TestTransliteratePackage(unittest.TestCase):
Expand All @@ -10,3 +10,6 @@ def test_lao2thai_script(self):

def test_thai2lao_script(self):
self.assertIsNotNone(thai2lao_script("พาสาลาว"))

def test_transliterate(self):
self.assertEquals(transliterate("ສະຫວັນນະເຂດ"),"sahvannaekhd")

0 comments on commit e25676a

Please sign in to comment.