-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVocabList.py
100 lines (71 loc) · 2.04 KB
/
VocabList.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import warnings
import os
class VocabList:
def __init__(self):
self._doc_list = []
self._doc_vocab_dict = {}
self.vocab_list = set()
def __len__(self):
return len(self.vocab_list)
def get_number_of_lists(self):
return len(self._doc_list)
def add_vocab_doc(self, doc):
doc = os.path.abspath(doc)
if doc in self._doc_list:
warnings.warn(
'{} was already added to the vocabulary list.'.format(doc))
return False
if len(self._doc_list) >= 10:
raise MemoryError('You can load at most 10 vocabulary lists.')
try:
self.add_vocab_to_list(doc)
except FileNotFoundError as err:
warnings.warn('{} cannot be found.'.format(doc))
return False
except Exception as e:
import sys
warnings.warn("Unexpected error:", sys.exc_info()[0])
return False
self._doc_list.append(doc)
return True
def add_vocab_to_list(self, doc):
doc = os.path.abspath(doc)
self._doc_vocab_dict[doc] = []
with open(doc, 'r') as vocab_f:
for v in vocab_f:
v = v.strip()
if len(v) > 24:
continue
self._doc_vocab_dict[doc].append(v.upper())
self.vocab_list |= set(self._doc_vocab_dict[doc])
return
def remove_vocab_doc(self, doc):
doc = os.path.abspath(doc)
if not doc in self._doc_list:
warnings.warn(
'{} was not in the vocabulary list.'.format(doc))
else:
self._doc_list.remove(doc)
if not doc in self._doc_vocab_dict:
warnings.warn(
'{} was not in the vocabulary list.'.format(doc))
else:
self._doc_vocab_dict.pop(doc)
self.vocab_list = set()
for doc in self._doc_list:
self.vocab_list |= set(self._doc_vocab_dict[doc])
return
def add_vocab_to_doc(self, doc, new_vocab_list):
try:
with open(doc, 'w+') as vocab_f:
for vocab in new_vocab_list:
vocab_f.write(vocab)
doc = os.path.abspath(doc)
if doc in self._doc_list:
self._doc_vocab_dict[doc] += new_vocab_list
self.vocab_list |= set(self._doc_vocab_dict[doc])
except Exception:
import sys
warnings.warn("Unexpected error:", sys.exc_info()[0])
return False
return True