This repository has been archived by the owner on Jul 10, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 50
/
spanish.py
151 lines (118 loc) · 4.67 KB
/
spanish.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import json
from bs4 import BeautifulSoup
from zdict.dictionary import DictBase
from zdict.exceptions import NotFoundError
from zdict.models import Record
# [TODO]
#
# * let user choose en <-> spanish
# * some word's webpage use different CSS class ... (e.g. yo)
# * make code much more readable
class SpanishDict(DictBase):
'''
Tested words : ('soy', 'manzana', 'python', 'perdón')
'''
API = 'http://www.spanishdict.com/translate/{word}'
@property
def provider(self):
return 'spanish'
@property
def title(self):
return 'SpanishDict'
def _get_url(self, word) -> str:
return self.API.format(word=word)
def show(self, record: Record):
content = json.loads(record.content)
self.color.print(content['word'], 'yellow')
explains = content.get('explains')
for data in explains:
self.color.print(data[0], 'lwhite') # speech
for explain in data[1]:
self.color.print(explain[0], 'lred') # category
for sentence in explain[1]:
self.color.print(
'{text}'.format(text=sentence[0]),
'org',
indent=2
)
if len(sentence) > 2:
self.color.print(
'{text}'.format(text=sentence[1]),
'lindigo',
indent=4
)
self.color.print(
'{text}'.format(text=sentence[2]),
'indigo',
indent=4
)
print()
def query(self, word: str):
webpage = self._get_raw(word)
data = BeautifulSoup(webpage, "html.parser")
content = {}
card = data.find('div', attrs={'class': 'card'})
entry = card.find(
# just get the first one
attrs={'class': 'dictionary-entry'}
)
if not entry:
raise NotFoundError(word)
content['explains'] = []
# word can be existing in both English & Spanish
word_element = (
card.find(attrs={'id': 'headword-en'})
or card.find(attrs={'id': 'headword-es'})
)
if word_element is None:
raise NotFoundError(word)
content['word'] = word_element.text
pattern1 = {'class': 'dictionary-neodict-indent-1'}
pattern2 = {'class': 'dictionary-neodict-indent-2'}
pattern3 = {'class': 'dictionary-neodict-indent-3'}
pattern_order = {'class': 'dictionary-neodict-translation'}
pattern_example = {'class': 'dictionary-neodict-example'}
pattern1_en = {'class': 'dictionary-neoharrap-indent-1'}
pattern2_en = {'class': 'dictionary-neoharrap-indent-2'}
pattern_order_en = {'class': 'dictionary-neoharrap-translation'}
speeches = card.find_all(attrs={'class': 'part_of_speech'})
for (speech, category) in zip(
speeches,
entry.find_all(attrs=pattern1) or entry.find_all(attrs=pattern1_en)
):
result = []
content['explains'].append([speech.text, result])
context = category.find(attrs={'class': 'context'}).text
explains = []
for explain in (category.find_all(attrs=pattern2) or
category.find_all(attrs=pattern2_en)):
orders = (explain.find_all(attrs=pattern_order) or
explain.find_all(attrs=pattern_order_en))
if orders:
# e.g.
#
# ('a. forgiveness', 'b. pardon (law)')
#
indices = tuple(
map(
lambda x: x.text.replace('\xa0', ' ').strip(),
orders
)
)
else:
continue
examples = explain.find_all(attrs=pattern3)
for (example, index) in zip(examples, indices):
t = tuple(example.find(attrs=pattern_example))
(spanish, english) = (t[0].text, t[2].text)
explains.append((index, spanish, english))
if (not examples) and (len(indices) > 0):
for index in indices:
explains.append((index,))
result.append([context, explains])
record = Record(
word=word,
content=json.dumps(content),
source=self.provider,
)
return record