/
conceptnetQuerier.py
executable file
·143 lines (121 loc) · 5.16 KB
/
conceptnetQuerier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import requests
import cPickle as pickle
import copy
# commented out to prevent accidental overwrite
# def pickle_term_map():
# pickleFile = open('termMap.pickle', 'wb')
# pickle.dump( termMap, pickleFile )
# pickleFile.close()
def unpickle_term_map():
try:
pickleFile = open( "termMap.pickle", "rb" )
terms = pickle.load( pickleFile )
if len(terms) > 0:
return terms
except Exception as e:
print "Exception: ", e
return {}
finally:
pickleFile.close()
return {}
termMap = unpickle_term_map()
# termMap = {}
def get_conceptnet_term(term, force_search = False):
if term + str(force_search) in termMap:
return copy.deepcopy(termMap[term + str(force_search)])
r = requests.get('http://conceptnet5.media.mit.edu/data/5.1/c/en/' + term)
if force_search or r.json['numFound'] == 0:
r = requests.get('http://conceptnet5.media.mit.edu/data/5.1/search?startLemmas=' + term)
json = r.json
termMap[term + str(force_search)] = json
return copy.deepcopy(json)
# build up and returns a dict of words A is connected to directly
def build_term_list(a, adata, forward = True):
aterms = {}
for edge in adata['edges']:
if is_useful_relationship(edge['rel']):
# A is the end of this edge
if edge['endLemmas'] == a:
aterms[edge['startLemmas']] = edge
if forward:
edge['rel'] = edge['rel'] + 'B'
else:
edge['rel'] = edge['rel'] + 'F'
# A is the start of this edge
else:
aterms[edge['endLemmas']] = edge
if forward:
edge['rel'] = edge['rel'] + 'F'
else:
edge['rel'] = edge['rel'] + 'B'
return aterms
def is_useful_relationship(relationship):
if relationship == u'/r/TranslationOf':
return False
return True
def search_indirect_oneway(a, aterms, adata, b, bterms, bdata, matches, a_first = True):
# check if A is connected to B by a single word contained in one of A's connections
for termA in aterms:
words = termA.split()
if len(words) > 1:
for subtermA in words:
# check if one of those single words is B
if subtermA == b:
matches.append( {'rels': (aterms[termA]['rel'],), 'degree': 1, 'edges': aterms[termA]} )
#check if B is connected to one of those single words
for termB in bterms:
if subtermA == termB:
if a_first:
matches.append( {'rels': (aterms[termA]['rel'], bterms[termB]['rel']), 'degree': 2, 'edges': [aterms[termA], bterms[termB]]} )
else:
matches.append( {'rels': (bterms[termB]['rel'], aterms[termA]['rel']), 'degree': 2, 'edges': [bterms[termB], aterms[termA]]} )
def get_relationship_simple(a, b, force_search_A = False, force_search_B = False):
adata = get_conceptnet_term(a, force_search_A)
bdata = get_conceptnet_term(b, force_search_B)
matches = []
# build up a list of words A and B are connected to directly
aterms = build_term_list(a, adata, True)
bterms = build_term_list(b, bdata, False)
# check if A and B both occur within the same start or end lemma from either word
sameLemma = False
for edge in adata['edges']:
if a in edge['startLemmas'] and b in edge['startLemmas'] and not sameLemma:
matches.append( {'rels': ('sameLemma',), 'degree': 1, 'edges': [edge]} )
sameLemma = True
if a in edge['endLemmas'] and b in edge['endLemmas'] and not sameLemma:
matches.append( {'rels': ('sameLemma',), 'degree': 1, 'edges': [edge]} )
sameLemma = True
for edge in bdata['edges']:
if a in edge['startLemmas'] and b in edge['startLemmas'] and not sameLemma:
matches.append( {'rels': ('sameLemma',), 'degree': 1, 'edges': [edge]} )
sameLemma = True
if a in edge['endLemmas'] and b in edge['endLemmas'] and not sameLemma:
matches.append( {'rels': ('sameLemma',), 'degree': 1, 'edges': [edge]} )
sameLemma = True
# check if B is a word A is connected to directly
for term in aterms:
if term == b:
matches.append( {'rels': (aterms[term]['rel'],), 'degree': 1, 'edges': aterms[b]} )
# check if A is a word B is connected to directly
for term in bterms:
if term == a:
matches.append( {'rels': (bterms[term]['rel'],), 'degree': 1, 'edges': bterms[a]} )
# check if A and B are connected via an intermediate term
for termA in aterms:
for termB in bterms:
if termA == termB:
matches.append( {'rels': (aterms[termA]['rel'], bterms[termB]['rel']), 'degree': 2, 'edges': [aterms[termA], bterms[termB]]} )
# check if A and B are connected indirectly
search_indirect_oneway(a, aterms, adata, b, bterms, bdata, matches)
search_indirect_oneway(b, bterms, bdata, a, aterms, adata, matches)
relationships = [match['rels'] for match in matches]
return relationships
def get_relationship(a, b):
relationships = get_relationship_simple(a, b, False, False)
if len(relationships) == 0:
relationships = get_relationship_simple(a, b, False, True)
if len(relationships) == 0:
relationships = get_relationship_simple(a, b, True, False)
if len(relationships) == 0:
relationships = get_relationship_simple(a, b, True, True)
return relationships