In [35]:
%config IPCompleter.greedy=True
import json
import requests
from tqdm import tqdm_notebook as tqdm

In [2]:
def json_read(filename):
    with open(filename, 'r') as inf:
        res = json.load(inf)
    return res

def json_dump(obj, filename, ea=False, indent=4):
    with open(filename, 'w') as ouf:
        json.dump(obj, ouf, ensure_ascii=ea, indent=indent)

### Evaluate wd-aqua

In [117]:
dataset = json_read('../dataset compilation/kbqa_russian_dataset.json')
test_uids = set(json_read('../dataset compilation/test_uids.json'))
dev_uids = set(json_read('../dataset compilation/dev_uids.json'))
tagset = set(json_read('../dataset compilation/tagset.json'))

In [37]:
variables = set()

In [38]:
wdaqua_answers = {}

#### Get answers

In [76]:
for entry in tqdm(dataset):
    uid = entry['uid']
    if uid in answers:
        continue
    q = entry['question_eng']
    resp = requests.post(
        'http://qanswer-core1.univ-st-etienne.fr/api/gerbil', 
        data={
            'query': q,
            'lang': 'en',
            'kb': 'wikidata'
        }
    ).json()
    if 'questions' not in resp:
        continue
    
    if not resp['questions'][0]['question']['answers']:
        continue
    
    resp = json.loads(resp['questions'][0]['question']['answers'])
    
    answers[uid] = resp['results']['bindings']
    
    vs = resp['head']['vars']
    if len(vs) > 1:
        print(q)
        print(resp)
        print()
    for v in vs:
        variables.add(v)   

HBox(children=(IntProgress(value=0, max=1500), HTML(value='')))

In [81]:
wdaqua_answers = answers

#### Check answers (usual)

In [126]:
validation = {}

In [127]:
for entry in tqdm(dataset):
    uid = entry['uid']
    answers = entry['answers']
    if uid not in wdaqua_answers:
        validation[uid] = 'wrong'
        continue
        
    cnt_matches = [0 for _ in range(len(answers))]
    cur_anses = [list(binding.values())[0] for binding in wdaqua_answers[uid]]
    
    if not cur_anses:
        validation[uid] = 'not found'
        continue
    
    error = False
    for cur_ans in cur_anses:
        cur_error = True
        for i, act_ans in enumerate(answers):
            if cur_ans['type'] == act_ans['type'] and cur_ans['value'] == act_ans['value']:
                cur_error = False
                cnt_matches[i] = 1
                break
        error = error | cur_error
    if error:
        validation[uid] = 'wrong'
        continue
    if sum(cnt_matches) == len(answers):
        validation[uid] = 'correct'
    else:
        validation[uid] = 'contains'
        

HBox(children=(IntProgress(value=0, max=1500), HTML(value='')))

In [128]:
json_dump(validation, 'validation_results.json')


#### Check answers (p@1)

In [148]:
validation = {}
for entry in tqdm(dataset):
    uid = entry['uid']
    answers = entry['answers']
    if uid not in wdaqua_answers:
        validation[uid] = 'wrong'
        continue
        
    cur_anses = [list(binding.values())[0] for binding in wdaqua_answers[uid]]
    
    if not cur_anses:
        validation[uid] = 'not found'
        continue
    
    res = 'wrong'
    for act_ans in answers:
        if cur_anses[0]['type'] == act_ans['type'] and cur_anses[0]['value'] == act_ans['value']:
            res = 'correct'
    validation[uid] = res

HBox(children=(IntProgress(value=0, max=1500), HTML(value='')))

In [150]:
json_dump(validation, 'validation_results_p1.json')

#### Count statistics

In [151]:
validation = json_read('validation_results_p1.json')

In [152]:
# test set answerable

tagwise_results = {}
for tag in tagset:
    tagwise_results[tag] = {
        'correct': 0,
        'contains': 0,
        'not found': 0,
        'wrong': 0,
        'manual': 0
    }
tagwise_results['overall'] = {
        'correct': 0,
        'contains': 0,
        'not found': 0,
        'wrong': 0,
        'manual': 0
    }

for entry in dataset[:1200]:
    uid = entry['uid']
    if uid not in test_uids:
        continue
    tags = entry['tags'] + ['overall']
    er = validation[str(uid)]
    for tag in tags:
        tagwise_results[tag][er] += 1
    if len(tags) > 2 and er == 'correct':
        print(tags)
json_dump(tagwise_results, 'tagwise_results_on_test_p1.json')

['exclusion', '1-hop', 'overall']
['exclusion', '1-hop', 'overall']
['reverse', '1-hop', 'overall']
['reverse', '1-hop', 'overall']
['qualifier-answer', 'multi-constraint', 'overall']
['reverse', '1-hop', 'overall']
['count', '1-hop', 'overall']
['count', '1-hop', 'overall']


In [130]:
# test set non-answerable

na_results = {
    'not found': 0,
    'wrong': 0
}

for entry in dataset[1200:]:
    uid = entry['uid']
    if uid not in test_uids:
        continue
    tags = entry['tags'] + ['overall']
    er = validation[uid]
    na_results[er] += 1
json_dump(na_results, 'na_results_on_test.json')

In [153]:
validation = json_read('validation_results.json')
validation_p1 = json_read('validation_results_p1.json')

In [155]:
# check recall

tm = 0
gj = 0
for entry in tqdm(dataset[:1200]):
    uid = entry['uid']
    answers = entry['answers']
    if uid not in wdaqua_answers:
        continue
    
    if validation_p1[str(uid)] == 'correct' and validation[str(uid)] == 'wrong':
        tm += 1
        
    if len(answers) > 1 and validation[str(uid)] == 'correct':
        gj += 1
        
print(f'Too many: {tm}')
print(f'Good job: {gj}')

HBox(children=(IntProgress(value=0, max=1200), HTML(value='')))

Too many: 8
Good job: 23


In [101]:
wdaqua_answers[3149]

[{'year': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer',
   'type': 'literal',
   'value': '1899'}}]

In [112]:
validation

{0: 'correct',
 1: 'wrong',
 2: 'correct',
 3: 'correct',
 4: 'correct',
 5: 'correct',
 6: 'wrong',
 7: 'wrong',
 8: 'wrong',
 9: 'wrong',
 10: 'correct',
 11: 'wrong',
 12: 'wrong',
 13: 'correct',
 14: 'wrong',
 15: 'wrong',
 16: 'wrong',
 17: 'correct',
 18: 'wrong',
 19: 'correct',
 20: 'wrong',
 21: 'wrong',
 22: 'correct',
 23: 'wrong',
 24: 'wrong',
 25: 'wrong',
 26: 'wrong',
 27: 'wrong',
 28: 'wrong',
 29: 'wrong',
 30: 'correct',
 31: 'wrong',
 32: 'wrong',
 33: 'wrong',
 34: 'wrong',
 36: 'wrong',
 37: 'wrong',
 38: 'wrong',
 39: 'wrong',
 40: 'correct',
 41: 'wrong',
 42: 'wrong',
 43: 'wrong',
 44: 'wrong',
 45: 'wrong',
 46: 'wrong',
 47: 'wrong',
 48: 'wrong',
 49: 'correct',
 50: 'correct',
 51: 'correct',
 52: 'wrong',
 53: 'wrong',
 54: 'wrong',
 55: 'correct',
 56: 'correct',
 57: 'wrong',
 58: 'correct',
 59: 'wrong',
 60: 'wrong',
 61: 'wrong',
 62: 'wrong',
 63: 'wrong',
 65: 'wrong',
 66: 'correct',
 67: 'wrong',
 68: 'correct',
 69: 'wrong',
 70: 'wrong',
 71:

In [141]:
resp = requests.post(
    'http://qanswer-core1.univ-st-etienne.fr/api/gerbil', 
    data={
        'query': 'What document, signed on 7 February 1992, marked the beginning of the European Union?',
        'lang': 'en',
        'kb': 'wikidata'
    }
)

In [142]:
resp.json()

{'questions': [{'question': {'answers': None,
    'language': [{'SPARQL': 'SELECT ?s1  WHERE {FILTER ( ?o10 = ). \n\t ?s1  ?p1  <http://www.wikidata.org/entity/Q49848> . \n\t ?s1  ?p2  <http://www.wikidata.org/entity/Q458> . \n\t ?s1  <http://www.wikidata.org/prop/direct/P580>  ?o10 . \n}    LIMIT 1000',
      'confidence': 0.5984673041672385}]}}]}

In [68]:
dataset[1034]['uid']

3010

In [77]:
len(answers)

1491

In [72]:
dataset[1035]['question_eng']

"What was Odysseus ' father's name?"

In [32]:
resp.json()['questions']

[{'question': {'answers': '{\n  "head" : {\n    "vars" : [\n      "o1"\n    ]\n  },\n  "results" : {\n    "bindings" : [\n      {\n        "o1" : {\n          "type" : "uri",\n          "value" : "http://www.wikidata.org/entity/Q164761"\n        }\n      },\n      {\n        "o1" : {\n          "type" : "uri",\n          "value" : "http://www.wikidata.org/entity/Q165704"\n        }\n      },\n      {\n        "o1" : {\n          "type" : "uri",\n          "value" : "http://www.wikidata.org/entity/Q170737"\n        }\n      },\n      {\n        "o1" : {\n          "type" : "uri",\n          "value" : "http://www.wikidata.org/entity/Q178108"\n        }\n      },\n      {\n        "o1" : {\n          "type" : "uri",\n          "value" : "http://www.wikidata.org/entity/Q180935"\n        }\n      },\n      {\n        "o1" : {\n          "type" : "uri",\n          "value" : "http://www.wikidata.org/entity/Q182570"\n        }\n      },\n      {\n        "o1" : {\n          "type" : "uri",\n  

In [33]:
json.loads(resp.json()['questions'][0]['question']['answers'])

{'head': {'vars': ['o1']},
 'results': {'bindings': [{'o1': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q164761'}},
   {'o1': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q165704'}},
   {'o1': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q170737'}},
   {'o1': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q178108'}},
   {'o1': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q180935'}},
   {'o1': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q182570'}},
   {'o1': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q185027'}},
   {'o1': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q191691'}},
   {'o1': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q334734'}},
   {'o1': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q537769'}}]}}