From d3edc10a4baf5a0bcb21121f02f94505cb94ec74 Mon Sep 17 00:00:00 2001 From: Chiu-Hsiang Hsu Date: Wed, 25 Jan 2017 01:32:16 +0800 Subject: [PATCH 1/3] [#108] add Taiwanese Moe Dict --- zdict/dictionaries/moe.py | 100 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/zdict/dictionaries/moe.py b/zdict/dictionaries/moe.py index 1d6ee310..a909ba2a 100644 --- a/zdict/dictionaries/moe.py +++ b/zdict/dictionaries/moe.py @@ -1,4 +1,5 @@ import json +import unicodedata # to detect Unicode category from zdict.dictionary import DictBase from zdict.exceptions import QueryError, NotFoundError @@ -96,3 +97,102 @@ def query(self, word: str): ) return record + + +def is_other_format(char): + return unicodedata.category(char) != 'Cf' + + +def remove_cf(data): + return ''.join(filter(is_other_format, data)) + + +def clean(data, clean_cf=False): + ''' + Clean the word segmentation + + remove "`~" and things in Unicode 'Cf' category + ''' + data = data.translate(str.maketrans('', '', '`~')) + if clean_cf: + return remove_cf(data) + else: + return data + + +class MoeDictTaiwanese(DictBase): + + API = 'https://www.moedict.tw/t/{word}.json' + + @property + def provider(self): + return 'moe-taiwanese' + + @property + def title(self): + return '萌典(臺)' + + def _get_url(self, word) -> str: + return self.API.format(word=word) + + def show(self, record: Record): + content = json.loads(record.content) + + # print word + self.color.print(clean(content.get('t', '')), 'yellow') + + for word in content.get('h', ''): + + # print pronounce + for key, display in ( + # TODO: where is bopomofo ? + ('T', '臺羅拼音'), # Tailo + ): + self.color.print(display, end='') + self.color.print( + '[' + word.get(key, '') + ']', + 'lwhite', + end=' ', + ) + + print() + print() + + # print explain + for count, explain in enumerate(word.get('d', '')): + + self.color.print('{order}. '.format(order=count+1), end='') + type = clean(explain.get('type', '')) + if type: + self.color.print( + '[' + type + ']', + 'lgreen', + end=' ', + ) + + self.color.print(clean(explain.get('f', '')), end='') + + for example in explain.get('e', ''): + self.color.print( + clean(example, True), + 'indigo', + indent=2, + ) + + print() + + print() + + def query(self, word: str): + try: + content = self._get_raw(word) + except QueryError as exception: + raise NotFoundError(exception.word) + + record = Record( + word=word, + content=content, + source=self.provider, + ) + + return record From e5725dd6b4b1a96a4a26e8605172377daf174260 Mon Sep 17 00:00:00 2001 From: Chiu-Hsiang Hsu Date: Wed, 25 Jan 2017 12:39:34 +0800 Subject: [PATCH 2/3] Add missing test for Taiwanese Moe Dict --- zdict/tests/dictionaries/test_moe.py | 49 +++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/zdict/tests/dictionaries/test_moe.py b/zdict/tests/dictionaries/test_moe.py index d174cb96..cdff62aa 100644 --- a/zdict/tests/dictionaries/test_moe.py +++ b/zdict/tests/dictionaries/test_moe.py @@ -1,7 +1,7 @@ from pytest import raises from unittest.mock import Mock, patch -from zdict.dictionaries.moe import MoeDict +from zdict.dictionaries.moe import MoeDict, MoeDictTaiwanese from zdict.exceptions import NotFoundError, QueryError from zdict.models import Record from zdict.zdict import get_args @@ -60,3 +60,50 @@ def test_show(self): # god bless this method, hope that it do not raise any exception self.dict.show(r) + + +class TestMoeDictTaiwanese: + def setup_method(self, method): + self.dict = MoeDictTaiwanese(get_args()) + + def teardown_method(self, method): + del self.dict + + def test__get_url(self): + assert 'https://www.moedict.tw/t/木耳.json' == self.dict._get_url('木耳') + + def test_provider(self): + assert self.dict.provider == 'moe-taiwanese' + + def test_query_timeout(self): + self.dict._get_raw = Mock(side_effect=QueryError('木耳', 404)) + + with raises(NotFoundError): + self.dict.query('木耳') + + self.dict._get_raw.assert_called_with('木耳') + + @patch('zdict.dictionaries.moe.Record') + def test_query_normal(self, Record): + self.dict._get_raw = Mock(return_value='{}') + self.dict.query('木耳') + Record.assert_called_with(word='木耳', content='{}', source='moe-taiwanese') + + def test_show(self): + content = ''' + { + "h": [{ + "T": "bo̍k-ní", + "_": "928", + "d": [{ + "f": "蕈`菇~`類~。`生長~`在~`朽~`腐~`的~`樹~`幹~`上~,`成~`片~`狀~,`一邊~`黏~`在~`腐~`木~`上~,`表面~`向~`上~`突出~,菌`絲~`體~`生長~`後~,`生~`子~`實體~,`形狀~`長~`得~`像~`人~`的~`耳~朵,`徑~`大約~`一~`公~`寸~,`內面~`平~`滑~,`呈現~`暗~褐`色~,`外面~`有~`柔軟~`的~`短~`毛~,`呈~`淡~褐`色~。`可以~`供~`食用~。", + "type": "`名~" + }] + }], + "t": "`木~`耳~" + } + ''' + r = Record(word='木耳', content=content, source=self.dict.provider) + + # god bless this method, hope that it do not raise any exception + self.dict.show(r) From 512c8ebaebed0a378e394ba72dcecb8727deecbb Mon Sep 17 00:00:00 2001 From: Chiu-Hsiang Hsu Date: Wed, 25 Jan 2017 12:54:04 +0800 Subject: [PATCH 3/3] Make flake8 happy --- zdict/tests/dictionaries/test_moe.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/zdict/tests/dictionaries/test_moe.py b/zdict/tests/dictionaries/test_moe.py index cdff62aa..39e96966 100644 --- a/zdict/tests/dictionaries/test_moe.py +++ b/zdict/tests/dictionaries/test_moe.py @@ -87,7 +87,9 @@ def test_query_timeout(self): def test_query_normal(self, Record): self.dict._get_raw = Mock(return_value='{}') self.dict.query('木耳') - Record.assert_called_with(word='木耳', content='{}', source='moe-taiwanese') + Record.assert_called_with(word='木耳', + content='{}', + source='moe-taiwanese') def test_show(self): content = ''' @@ -96,7 +98,7 @@ def test_show(self): "T": "bo̍k-ní", "_": "928", "d": [{ - "f": "蕈`菇~`類~。`生長~`在~`朽~`腐~`的~`樹~`幹~`上~,`成~`片~`狀~,`一邊~`黏~`在~`腐~`木~`上~,`表面~`向~`上~`突出~,菌`絲~`體~`生長~`後~,`生~`子~`實體~,`形狀~`長~`得~`像~`人~`的~`耳~朵,`徑~`大約~`一~`公~`寸~,`內面~`平~`滑~,`呈現~`暗~褐`色~,`外面~`有~`柔軟~`的~`短~`毛~,`呈~`淡~褐`色~。`可以~`供~`食用~。", + "f": "蕈`菇~`類~。`生長~`在~`朽~`腐~`的~`樹~`幹~`上~ ...", "type": "`名~" }] }],