Skip to content

Commit

Permalink
Cleanup unused files and code
Browse files Browse the repository at this point in the history
  • Loading branch information
wetneb committed Apr 14, 2019
1 parent da663b1 commit 52b8479
Show file tree
Hide file tree
Showing 4 changed files with 0 additions and 166 deletions.
1 change: 0 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from opentapioca.languagemodel import BOWLanguageModel
from opentapioca.tagger import Tagger
from opentapioca.classifier import SimpleTagClassifier
from opentapioca.goldstandard import GoldStandardDataset

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
Expand Down
83 changes: 0 additions & 83 deletions opentapioca/goldstandard.py

This file was deleted.

33 changes: 0 additions & 33 deletions opentapioca/run_crossvalidation.py

This file was deleted.

49 changes: 0 additions & 49 deletions opentapioca/wikidatagraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,53 +167,4 @@ def get_pagerank(self, qid):
else:
return 0.01/self.pagerank.shape[1]

def compute_similarity(self, qida, qidb, steps=3, beta=0.5, explain=False):
"""
Compute the similarity between two qids
"""
va = self._get_neighbour_vector(int(qida[1:]), steps, beta)
vb = self._get_neighbour_vector(int(qidb[1:]), steps, beta)
dp = va.dot(vb.transpose())
if explain:
self._print_neighbours(va)
self._print_neighbours(vb)
prod = va.multiply(vb)
non_zero = prod.nonzero()[1]
best_id = 0
best_score = 0
for idx in range(len(non_zero)):
score = prod[0,non_zero[idx]]
if score > best_score:
best_score = score
best_id = non_zero[idx]
print('https://www.wikidata.org/wiki/Q%d' % best_id)
return dp[0,0]

def _get_neighbour_vector(self, id, steps, beta):
"""
Returns the neighbour vector after a few iterations of the matrix
"""
N = self.mat.shape[0]
initial_v = sparse.csr_matrix(([1],([0],[id])), shape=(1,N))
v = initial_v
for k in range(steps):
nv = v.dot(self.mat)

# loss compensation and recurrence
l1norm = nv.sum()
if l1norm == 0:
return v
nv *= (1./l1norm)*(1-beta)
nv += beta*initial_v

# update
v = nv

return v

def _print_neighbours(self, neighbour_vector):
non_zero = neighbour_vector.nonzero()[1]
for idx in range(len(non_zero)):
print('\t'.join([str(neighbour_vector[0,non_zero[idx]]), 'https://www.wikidata.org/wiki/Q%d' % non_zero[idx]]))


0 comments on commit 52b8479

Please sign in to comment.