Skip to content

Commit

Permalink
unit test for one_column .vocab file
Browse files Browse the repository at this point in the history
  • Loading branch information
libofang committed Apr 27, 2018
1 parent cbb8f48 commit dec3786
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 17 deletions.
3 changes: 3 additions & 0 deletions tests/test_vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

path_text_file = "./tests/data/corpora/plain/sense_small.txt"
path_vocab = "./tests/data/vocabs/plain"
path_vocab = "./tests/data/vocabs/one_column"


def run_module(name: str, args, run_name: str = '__main__') -> None:
Expand Down Expand Up @@ -50,6 +51,8 @@ def test_load_from_dir(self):
vocab = Vocabulary()
vocab.load(path_vocab)
print("the:", vocab.get_id("the"))
vocab.load(path_vocab)
print("the:", vocab.get_id("the"))

def test_tokens_to_ids(self):
vocab = Vocabulary()
Expand Down
34 changes: 17 additions & 17 deletions vecto/vocabulary/vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,23 +53,23 @@ def save_to_dir(self, path):
f.close()
save_json(self.metadata, os.path.join(path, "metadata.json"))

# def load_list_from_sorted_file(self, filename):
# self.lst_words = []
# f = open(filename, encoding='utf-8', errors='replace')
# lines = f.readlines()
# for line in lines:
# token = line.strip()
# self.lst_words.append(token)
# f.close()

# def create_dic_from_list(self):
# self.dic_words_ids = {}
# for i in range(len(self.lst_words)):
# self.dic_words_ids[self.lst_words[i]] = i

# def load_from_list(self, path):
# self.load_list_from_sorted_file(path)
# self.create_dic_from_list()
def load_list_from_sorted_file(self, filename):
self.lst_words = []
f = open(filename, encoding='utf-8', errors='replace')
lines = f.readlines()
for line in lines:
token = line.strip()
self.lst_words.append(token)
f.close()

def create_dic_from_list(self):
self.dic_words_ids = {}
for i in range(len(self.lst_words)):
self.dic_words_ids[self.lst_words[i]] = i

def load_from_list(self, path):
self.load_list_from_sorted_file(path)
self.create_dic_from_list()

def load_tsv(self, path):
pos = 0
Expand Down

0 comments on commit dec3786

Please sign in to comment.