Skip to content

Commit

Permalink
Merge 6ccc5ad into 21c9c2b
Browse files Browse the repository at this point in the history
  • Loading branch information
bakarov committed May 4, 2018
2 parents 21c9c2b + 6ccc5ad commit efb2a50
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 6 deletions.
File renamed without changes.
22 changes: 18 additions & 4 deletions tests/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@
from vecto.benchmarks.similarity import Similarity
from vecto.benchmarks.analogy import *
from vecto.embeddings import load_from_dir
from vecto.benchmarks.fetch_benchmarks import fetch_benchmarks
from os import path
from shutil import rmtree


path_similarity_dataset = "./tests/data/benchmarks/similarity/"
path_analogy_dataset = "./tests/data/benchmarks/analogy/"

class Tests(unittest.TestCase):


class Tests(unittest.TestCase):
def test_similarity(self):
embs = load_from_dir("tests/data/embeddings/text/plain_with_file_header")
similarity = Similarity()
Expand All @@ -23,17 +25,19 @@ def test_similarity(self):
print(result)

def test_analogy(self):
embs = load_from_dir("./tests/data/embeddings/text/plain_with_file_header")

embs = load_from_dir("tests/data/embeddings/text/plain_with_file_header")
analogy = LinearOffset()
result = analogy.get_result(embs, path_analogy_dataset)
print(result)

analogy = PairDistance()
result = analogy.get_result(embs, path_analogy_dataset)
print(result)

analogy = ThreeCosMul()
result = analogy.get_result(embs, path_analogy_dataset)
print(result)

analogy = ThreeCosMul2()
result = analogy.get_result(embs, path_analogy_dataset)
print(result)
Expand All @@ -47,6 +51,16 @@ def test_analogy(self):
result = analogy.get_result(embs, path_analogy_dataset)
print(result)

def test_fetcher(self):
if path.isdir('./tests/data/benchmarks_test'):
rmtree('./tests/data/benchmarks_test')
fetch_benchmarks('./tests/data/benchmarks_test')
embs = load_from_dir("tests/data/embeddings/text/plain_with_file_header")
similarity = Similarity()
path_similarity_dataset = "./tests/data/benchmarks_test/benchmarks/similarity/en/"
result = similarity.get_result(embs, path_similarity_dataset)


# big embs and dataset test
# embs = load_from_dir("/home/bofang/Documents/embeddings/negative_sampling/fair/")
# result = analogy.get_result(embs, "/home/bofang/Downloads/BATS_3.0_small")
Expand Down
2 changes: 2 additions & 0 deletions vecto/benchmarks/analogy/analogy.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,8 @@ def run(self, embs, path_dataset):
results = []
for root, dirnames, filenames in os.walk(dir_tests):
for filename in fnmatch.filter(sorted(filenames), '*'):
if filename.endswith('json'):
continue
print(filename)
pairs = self.get_pairs(os.path.join(root, filename))
# print(pairs)
Expand Down
3 changes: 1 addition & 2 deletions vecto/benchmarks/fetch_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
from os import path

path_to_repo = 'https://github.com/vecto-ai/benchmarks.git'
path_to_local_dir = path.join('data', 'benchmarks')


def fetch_benchmarks():
def fetch_benchmarks(path_to_local_dir=path.join('data', 'benchmarks')):
try:
Repo.clone_from('https://github.com/vecto-ai/benchmarks.git', path_to_local_dir)
except GitCommandError:
Expand Down
2 changes: 2 additions & 0 deletions vecto/benchmarks/similarity/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def evaluate(self, embs, data):
def run(self, embs, path_dataset):
results = []
for file in os.listdir(path_dataset):
if file.endswith('json'):
continue
testset = self.read_test_set(os.path.join(path_dataset, file))

out = dict()
Expand Down

0 comments on commit efb2a50

Please sign in to comment.