Skip to content
This repository has been archived by the owner on Nov 10, 2022. It is now read-only.

Commit

Permalink
Add support for sitelinks, closes #17
Browse files Browse the repository at this point in the history
Former-commit-id: e3469f2
  • Loading branch information
wetneb committed Oct 25, 2019
1 parent 3e7cc82 commit 5b44715
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 6 deletions.
13 changes: 12 additions & 1 deletion tests/propertypath.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def test_parse(self):
'P14/(P131/P17|P17)',
'P131/(P17|.)',
'P17/Len',
'P17/Sfrwiki',
'(Len|Afi)',
'P4424_P518',
'P17/qid',
Expand Down Expand Up @@ -84,7 +85,11 @@ def test_resolve_property_path(self):
self.resolve('P1449', 'Q1249148'),
[
MonolingualValue(text='Dick',language='en'),
MonolingualValue(text='Rich',language='en')])
MonolingualValue(text='Rich',language='en'),
MonolingualValue(text='Rik',language='en'),
MonolingualValue(text='Richey',language='en'),
MonolingualValue(text='Rick',language='en'),
MonolingualValue(text='Ritchie',language='en')])

self.assertEqual(
self.resolve('P17', 'Q83259'),
Expand Down Expand Up @@ -134,6 +139,12 @@ def test_resolve_property_path(self):
[IdentifierValue(value='France')]
)

# With sitelink
self.assertEqual(
self.resolve('P17/Sfrwiki', 'Q83259'),
[IdentifierValue(value='France')]
)

def value_types(self, path, qid):
return {v.value_type
for v in self.resolve(path, qid) if not v.is_novalue()}
Expand Down
2 changes: 1 addition & 1 deletion tests/requests_cache.sqlite.REMOVED.git-id
Original file line number Diff line number Diff line change
@@ -1 +1 @@
9c0a699df6b096c98d093406dadf0623625769e1
d018da6296f65d7b7fbc42815d3f18facc20230b
3 changes: 1 addition & 2 deletions tests/suggest.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ def test_sparql(self):

def test_sparql_not_first_for_pid(self):
results = self.results('property', 'P17', lang='en')
print(results)
self.assertEqual(results[0]['name'], 'country')

def test_qid_property(self):
Expand Down Expand Up @@ -110,7 +109,7 @@ def test_propose_property(self):
[p['name'] for p in self.propose('Q3918', lang='fr', limit=50)])

def test_flyout(self):
self.assertTrue('British author and humorist' in
self.assertTrue('humorist' in
self.s.flyout({'id':'Q42','lang':'en'})['html'])


Expand Down
8 changes: 7 additions & 1 deletion wdreconcile/itemstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def _fetch_items(self, qids):
r = requests.get(mediawiki_api_endpoint,
{'action':'wbgetentities',
'format':'json',
'props':'aliases|labels|descriptions|claims',
'props':'aliases|labels|descriptions|claims|sitelinks',
'ids':'|'.join(first_batch)},
headers={'User-Agent':user_agent})
r.raise_for_status()
Expand Down Expand Up @@ -152,6 +152,12 @@ def minify_item(self, item):
# Add datatype for properties
simplified['datatype'] = item.get('datatype')

# Add sitelinks
simplified['sitelinks'] = {
key : obj.get('title')
for key, obj in (item.get('sitelinks') or {}).items()
}

return simplified


Expand Down
37 changes: 37 additions & 0 deletions wdreconcile/propertypath.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
('DOT', (r'\.',)),
('PID', (r'P\d+',)),
('TERM', (r'[LDA][a-z\-]+',)),
('SITELINK', (r'S[a-z\-]+',)),
('QID', (r'qid',)),
('SLASH', (r'/',)),
('PIPE', (r'\|',)),
Expand Down Expand Up @@ -62,6 +63,7 @@ def __init__(self, item_store):
(t('PID') >> self.make_leaf) |
(t('QID') >> self.make_qid) |
(t('TERM') >> self.make_term) |
(t('SITELINK') >> self.make_sitelink) |
(t('DOT') >> self.make_empty) |
(st('LBRA') + pipe_path + st('RBRA'))
)
Expand Down Expand Up @@ -105,6 +107,9 @@ def make_qualifier(self, pids):
def make_term(self, term):
return TermPath(self, term.value[0], term.value[1:])

def make_sitelink(self, sitelink):
return SitelinkPath(self, sitelink.value[1:])

def make_slash(self, lst):
return ConcatenatedPropertyPath(self, lst[0], lst[1])

Expand Down Expand Up @@ -508,6 +513,38 @@ def expected_types(self):
def readable_name(self, lang):
return self.term_type + self.lang

class SitelinkPath(PropertyPath):
"""
A node for accessing the sitelinks of an item
"""

def __init__(self, factory, site):
super(SitelinkPath, self).__init__(factory)
self.site = site

def step(self, v, referenced='any', rank='any'):
if v.value_type != 'wikibase-item':
return []
item = self.get_item(v)

if not item:
return []
sitelink = (item.get('sitelinks') or {}).get(self.site)
if sitelink:
return [IdentifierValue(value=sitelink)]
return []

def __str__(self, add_prefix=False):
return 'S'+self.site

def uniform_depth(self):
raise ValueError('One property is not an identifier')

def expected_types(self):
return []

def readable_name(self, lang):
return 'Sitelink ' + self.site

class ConcatenatedPropertyPath(PropertyPath):
"""
Expand Down
2 changes: 1 addition & 1 deletion wdreconcile/suggest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def autodescribe(qid, lang):
class SuggestEngine(object):
def __init__(self, redis_client):
self.r = redis_client
self.property_path_re = re.compile(r'(SPARQL ?:? ?)?(\(*(P\d+|[LAD][a-z\-]+)[/\|@].*)$')
self.property_path_re = re.compile(r'(SPARQL ?:? ?)?(\(*(P\d+|[LADS][a-z\-]+)[/\|@].*)$')
self.pid_re = re.compile('^P[1-9][0-9]*$')
self.store = ItemStore(self.r)
self.ft = PropertyFactory(self.store)
Expand Down

0 comments on commit 5b44715

Please sign in to comment.