Skip to content

Commit

Permalink
fixed some bugs to make the analogy test more robust
Browse files Browse the repository at this point in the history
  • Loading branch information
libofang committed Apr 6, 2018
1 parent d64d67f commit 977e0b8
Showing 1 changed file with 14 additions and 3 deletions.
17 changes: 14 additions & 3 deletions vsmlib/benchmarks/analogy/analogy.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,15 @@ def gen_vec_single(pairs):
# a_prime=[i for sublist in a_prime for i in sublist]
a_prime = [i for i in a_prime if m.vocabulary.get_id(i) >= 0]
a = [i for i in a if m.vocabulary.get_id(i) >= 0]
noise = [random.choice(m.vocabulary.lst_words) for i in range(len(a))]

l = len(a)
if l == 0:
l = 1
noise = [random.choice(m.vocabulary.lst_words) for i in range(l)]

if len(a_prime) == 0:
a_prime.append(random.choice(m.vocabulary.lst_words))

x = list(a_prime) + list(a) + list(a) + list(a) + list(a) + noise
X = np.array([m.get_row(i) for i in x])
Y = np.hstack([np.ones(len(a_prime)), np.zeros(len(x) - len(a_prime))])
Expand Down Expand Up @@ -453,6 +461,7 @@ def __call__(self, p_train, p_test):
cache_size=1000,
class_weight='balanced',
probability=True)
# print(Y_train)
model_regression.fit(X_train, Y_train)
score_reg = model_regression.predict_proba(m.matrix)[:, 1]
for p_test_one in p_test:
Expand Down Expand Up @@ -657,9 +666,11 @@ def get_pairs(fname):
try:
id_line += 1
if "\t" in line:
left, right = line.lower().split("\t")
s = line.lower().split("\t")
else:
left, right = line.lower().split()
s = line.lower().split()
left = s[0]
right = s[1]
right = right.strip()
if "/" in right:
right = [i.strip() for i in right.split("/")]
Expand Down

0 comments on commit 977e0b8

Please sign in to comment.