Skip to content

Commit

Permalink
LaoNLP v1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
wannaphong committed Aug 25, 2023
1 parent dea1142 commit 652a513
Show file tree
Hide file tree
Showing 18 changed files with 260 additions and 8 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Lao language Natural Language Processing (NLP)
- Lao to Thai script
- Thai to Lao script
- Word dictionary
- Word Vector (**New**)
- Word Vector

## Install
```
Expand Down
15 changes: 15 additions & 0 deletions laonlp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from laonlp.tokenize import *
from laonlp.corpus import *
from laonlp.transliterate import *
Expand Down
15 changes: 15 additions & 0 deletions laonlp/corpus/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import laonlp
import os

Expand Down
15 changes: 15 additions & 0 deletions laonlp/corpus/core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
from laonlp.corpus import laonlp_path

Expand Down
16 changes: 15 additions & 1 deletion laonlp/corpus/lao_words.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# -*- coding: utf-8 -*-
import os
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from typing import List
from typing import FrozenSet
from laonlp.corpus.core import get_path_corpus
Expand Down
18 changes: 16 additions & 2 deletions laonlp/corpus/mopt_dict.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import csv
import os
from collections import defaultdict

from laonlp.corpus import laonlp_path
from collections import defaultdict
from laonlp.corpus.core import get_path_corpus
corpus_path = get_path_corpus("lao-eng-dictionary.csv")
list_data=[]
Expand Down
15 changes: 15 additions & 0 deletions laonlp/tag/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from laonlp.tag.pos_tag import pos_tag

__all__ = [
Expand Down
15 changes: 15 additions & 0 deletions laonlp/tag/pos_tag.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from typing import List, Tuple
from laonlp.corpus import get_path_corpus
from pythainlp.tag import PerceptronTagger
Expand Down
15 changes: 15 additions & 0 deletions laonlp/tokenize/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from typing import List
from pythainlp.tokenize import Tokenizer
from laonlp.corpus import lao_words
Expand Down
15 changes: 15 additions & 0 deletions laonlp/translate/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
__all__ = [
"word_dictionary",
]
Expand Down
17 changes: 15 additions & 2 deletions laonlp/translate/mopt_dict.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
# -*- coding: utf-8 -*-
import csv
import os
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from typing import List

from laonlp.corpus import mopt_dict
Expand Down
15 changes: 15 additions & 0 deletions laonlp/transliterate/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
__all__ = [
"lao2thai_script",
"thai2lao_script",
Expand Down
15 changes: 15 additions & 0 deletions laonlp/util/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
__all__ = [
"lao_digit_to_arabic_digit",
"arabic_digit_to_lao_digit",
Expand Down
15 changes: 15 additions & 0 deletions laonlp/util/digitconv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
NUMBERS = "໑໒໓໔໕໖໗໘໙໐"
_arabic_numerals = "1234567890"
_pronunciation = [
Expand Down
16 changes: 16 additions & 0 deletions laonlp/util/lao.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
TONE_MARKS = "່້"+"໊໋"
_tone_mark = str.maketrans({i:None for i in TONE_MARKS})

Expand Down
15 changes: 15 additions & 0 deletions laonlp/word_vector/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from laonlp.word_vector.word2vec import Word2Vec

__all__ = [
Expand Down
15 changes: 15 additions & 0 deletions laonlp/word_vector/word2vec.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from typing import List
import gensim
from huggingface_hub import hf_hub_download
Expand Down
19 changes: 17 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
"""
Copyright 2020 - 2023 Wannaphong Phatthiyaphaibun
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from setuptools import find_packages, setup

with open("README.md","r",encoding="utf-8-sig") as f:
Expand All @@ -9,7 +24,7 @@

setup(
name="LaoNLP",
version="0.7",
version="1.0",
description="Lao Natural Language Processing library",
long_description=readme,
long_description_content_type="text/markdown",
Expand Down Expand Up @@ -38,7 +53,7 @@
"Lao language",
],
classifiers=[
"Development Status :: 3 - Alpha",
"Development Status :: 5 - Production/Stable",
"Programming Language :: Python :: 3",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
Expand Down

0 comments on commit 652a513

Please sign in to comment.