In [53]:
import pandas as pd
from statistics import mean 
import math 

In [12]:
# Question ID and Answer ID pair
qid_docid = pd.read_csv("train/FiQA_train_question_doc_final.tsv", sep="\t")

qid_docid = qid_docid [['qid', 'docid']]

qid_docid.head(5)

Unnamed: 0,qid,docid
0,0,18850
1,1,14255
2,2,308938
3,3,296717
4,3,100764


In [26]:
# Create dict for query id and relevant passgages
# keys: query ids, values: list of relevant passages
qid_rel = {}

for index, row in qid_docid.iterrows():
    
    if row['qid'] not in qid_rel:
        qid_rel[row['qid']] = []
    qid_rel[row['qid']].append(row['docid'])

In [256]:
# Number of relevant passages for each query
num_rel = [len(v) for v in qid_rel.values()]

avg_num_rel = mean(num_rel)
max_num_rel = max(num_rel)
min_num_rel = min(num_rel)

print("Average number of relevant passages for each query: {}".format(avg_num_rel))
print("Max number of relevant passages for each query: {}".format(max_num_rel))
print("Min number of relevant passages for each query: {}".format(min_num_rel))

Average number of relevant passages for each query: 2.5737063778580023
Max number of relevant passages for each query: 23
Min number of relevant passages for each query: 1


In [22]:
# Create dict for query id and ranked candidates
# key: query ids, values: list of 1000 ranked candidates
qid_ranked_docs = {}

with open("fiqa-passage/run_train.tsv",'r') as f:
    for line in f:
        # [qid, doc_id, rank]
        line = line.strip().split('\t')
        qid = int(line[0])
        doc_id = int(line[1])
        rank = int(line[2])
        
        if qid not in qid_ranked_docs:
            # Create a list of size 1000 for each query to store the candidates
            candidates = [0]*1000
            qid_ranked_docs[qid] = candidates
        qid_ranked_docs[qid][rank-1] = doc_id

In [331]:
# Fucntions to compute nDCG and MRR
def get_rel_score(rel_score, cand_docs, rel_docs, k):
    
    if qid not in rel_score:
        rel_score[qid] = []

        for i in range(0, k):
            if cand_docs[i] in rel_docs:
                rel_score[qid].append(1)
            else:
                rel_score[qid].append(0)

    return rel_score

def dcg(rels, k):
    cumulated_sum = rels[0]
    for i in range(1, k):
        cumulated_sum += rels[i]/math.log(i+1,2)
    return cumulated_sum

def avg_ndcg(rel_score):
    ndcg_list = []
    for qid, rels in rel_score.items():
        dcg_val = dcg(rels, k)   
        sorted_rel = sorted(rels, reverse=True)
        idcg_val = dcg(sorted_rel, k)

        try:
            ndcg_val = dcg_val/idcg_val
            ndcg_list.append(ndcg_val)
        except ZeroDivisionError:
            ndcg_list.append(0)
            
    assert len(ndcg_list) == len(rel_score), "Relevant score doesn't match"

    avg = mean(ndcg_list)

    return avg

def compute_RR(cand_docs, rel_docs, cumulated_reciprocal_rank, k):
    
    for i in range(0, k):
        # If the doc_id of the top 10 ranked candidate passages is in the list of relevant passages
        if cand_docs[i] in rel_docs:
            # Compute the reciprocal rank (i is the ranking)
            cumulated_reciprocal_rank += 1/(i+1)
            break
            
    return cumulated_reciprocal_rank

In [292]:
print(qid_ranked_docs[6][:10])
print(qid_rel[6][:10])
print()
print(qid_ranked_docs[102][:10])
print(qid_rel[102][:10])

[560251, 210300, 188530, 108734, 78139, 564488, 151506, 477720, 349669, 106319]
[560251, 188530, 564488]

[494264, 187073, 356835, 173858, 536063, 10500, 107819, 174693, 192591, 278460]
[494264, 187073]


In [332]:
k = 10
cumulated_reciprocal_rank = 0
num_rel_docs = 0
rel_score = {}
precision_list = {}

# For each query
for qid in qid_ranked_docs:
    # If the query has a relevant passage
    if qid in qid_rel:
        # Get the list of relevant docs for a query
        rel_docs = qid_rel[qid]
        # Get the list of ranked docs for a query
        cand_docs = qid_ranked_docs[qid]
        
        rel_scores = get_rel_score(rel_score, cand_docs, rel_docs, k)
        
        # MRR@k
        cumulated_reciprocal_rank = compute_RR(cand_docs, rel_docs, cumulated_reciprocal_rank, k)


print("Average nDCG@{} for {} queries: {}".format(k, len(qid_rel), avg_ndcg(rel_scores)))
print()

MRR = cumulated_reciprocal_rank/len(qid_rel)

print("MRR@{} for {} queries: {}".format(k, len(qid_rel), MRR))

Average nDCG@10 for 6648 queries: 0.36504981770986045

MRR@10 for 6648 queries: 0.3104752234828942


In [340]:
# Precision at k
k = 1

scores = {}

precision_at_k = []

for qid, scores in rel_score.items():
    num_rel = 0
    for i in range(0, k):
        if scores[i] == 1:
            num_rel += 1
    precision_at_k.append(num_rel/k)
        
mean_precision_at_k = mean(precision_at_k)

print(mean_precision_at_k)

0.23901925391095066
