# Information Retrieval System for Persian Wikipedia 

In this project, I will implement a simple information retrieval system for a dataset consisting of Persian Wikipedia web pages. 

# 1- Data preparation and Text Operations

In [1]:
# Loading required libraries

from __future__ import unicode_literals
import re
import os
import json
import glob
import math
import heapq
import random
import numpy as np
import editdistance
from hazm import *

In [2]:
# Preparing text for use in index
# Note: Bigram index doesn't use stemming

def prepare_text(raw_text, stem_flag=True):
    prepared_text = raw_text
    
    # 1. Remove Punctuation marks
    punctuation = '[^۰-۹ آ-ی a-z 0-9 \u200c]'
    prepared_text = re.sub(punctuation, ' ', prepared_text)
    
    # 2. Normalization
    normalizer = Normalizer()
    prepared_text = normalizer.normalize(prepared_text) 
    
    # 3. Tokenization
    tokens = word_tokenize(prepared_text) 
    
    # 4. Stemming
    if stem_flag:
        stemmer = Stemmer()
        tokens = [stemmer.stem(token) for token in tokens] 
        
    return tokens

# 2- Index Construction

## 2.1- Positional Index

In [3]:
index = {}
doc_ids = []

In [4]:
# Parses an xml document and extracts id, title and text

def parse_next_doc(corpus):
    content = ''
    for line in corpus:
        content += line
        if '</page>' in line:
            break
    try:
        id = re.search('<id>(.*?)</id>', content, re.DOTALL).group(1)
        title = re.search('<title>(.*?)</title>', content, re.DOTALL).group(1) 
        text = re.search('<text(.*?)</text>', content, re.DOTALL).group(1)
    except:
        return None
    
    doc_dict = {'id': id, 'title': title, 'text': text}
    return doc_dict

In [5]:
# Adds content of a zone(title, text) for a doc to index

def add_zone_to_index(doc_dict, zone):
    doc_id = doc_dict.get('id')
    prepared_zone = prepare_text(doc_dict.get(zone), True)
    for pos, term in enumerate(prepared_zone):
        if term in index:
            try:
                index[term][doc_id][zone].append(pos)
            except:
                try:
                    index[term][doc_id][zone] = [pos]
                except:
                    index[term][doc_id] = {}
                    index[term][doc_id][zone] = [pos]    
                 
        else:
            index[term] = {}
            index[term][doc_id] = {}
            index[term][doc_id][zone] = [pos]  

In [6]:
# Building positional index

def construct_positional_indexes(docs_path):
    global index
    corpus = open(docs_path, encoding = 'utf8')
    doc_dict = parse_next_doc(corpus)
    while doc_dict:
        doc_ids.append(doc_dict.get('id'))
        add_zone_to_index(doc_dict, 'title')                
        add_zone_to_index(doc_dict, 'text')
        doc_dict = parse_next_doc(corpus)
    print("Index is build completely!")    
        
construct_positional_indexes('data/Persian.xml')

Index is build completely!


In [7]:
def get_posting_list(word):
    try:
        posting_list = index[word]
    except:
        posting_list = []
    return posting_list

## 2.2- Bigram Index

In [8]:
bigram_index = {}
bigram_dict = set()

In [9]:
# Building bigram index

def construct_bigram_index(docs_path):
    global bigram_index
    corpus = open(docs_path, encoding = 'utf8')
    doc_dict = parse_next_doc(corpus)
    while doc_dict:
        doc_id = doc_dict['id']
        words = prepare_text(doc_dict['title'], stem_flag=False) + prepare_text(doc_dict['text'], stem_flag=False)
        for word in words:
            bigram_dict.add(word)
            marked_word = '$' + word + '$'
            bigrams = [marked_word[i:i + 2] for i in range(0, len(marked_word) - 1)]
            for bigram in bigrams:
                try:
                    bigram_index[bigram].add(word)
                except:
                    bigram_index[bigram] = set()
                    bigram_index[bigram].add(word)
        doc_dict = parse_next_doc(corpus)
    print("Bigram Index is build completely!") 

construct_bigram_index('data/Persian.xml')   

Bigram Index is build completely!


In [None]:
# add this function after construct bigram index

def remove_bigram_index(docs_path):
    global bigram_index
    corpus = open(docs_path, encoding = 'utf8')
    doc_dict = parse_next_doc(corpus)
    while doc_dict:
        doc_id = doc_dict['id']
        words = prepare_text(doc_dict['title'], stem_flag=False) + prepare_text(doc_dict['text'], stem_flag=False)
        for word in words:
            bigram_dict.add(word)
            marked_word = '$' + word + '$'
            bigrams = [marked_word[i:i + 2] for i in range(0, len(marked_word) - 1)]
            for bigram in bigrams:
                #try:
                #    bigram_index[bigram].add(word)
                #except:
                #    bigram_index[bigram] = set()
                #    bigram_index[bigram].add(word)
                if not prepare_text(word)[0] in index:
                    try:
                        bigram_index[bigram].remove(word) 
                    except:
                        pass
                                     
        doc_dict = parse_next_doc(corpus)        

In [10]:
def get_words_with_bigram(bigram):
    try:
        return list(bigram_index[bigram])
    except:
        return []

get_words_with_bigram('ثر')

['ورثرغنه',
 'ثروت\u200cها',
 'ثروت\u200cهای',
 'دراکثرشهرهای',
 'منثره',
 'تأثرش',
 'یثرب',
 'مؤثر\u200cتر',
 'ثریایی',
 'اکثرأ',
 'اثرگذارترین',
 'اثرش',
 'اثربخش',
 'مفقودالاثرهای',
 'ناموثر',
 'نثری',
 'حداکثری',
 'نثر',
 'اثرگذار',
 'حداکثر',
 'اکثریتی',
 'تکثر',
 'مؤثرتر',
 'نثرنویسی',
 'اکثرا',
 'اثرهایی',
 'ثروت\u200cمندان',
 'مؤثرترین',
 'میثر',
 'ثروتمند',
 'ثروتمندان',
 'اثری',
 'اثربخشی',
 'موثرترین',
 'اثره',
 'اثر',
 'مؤثرند',
 'اثرگذاری',
 'ثروتی',
 'اثرها',
 'اکثرنفاط',
 'تأثر',
 'براثر',
 'ثروتمندترین\u200cها',
 'میثرا',
 'گوثری',
 'اثراتی',
 'ثروت',
 'کثرة',
 'موثرتری',
 'نام\u200cاثر',
 'اثرهای',
 'ثروتهای',
 'دراثر',
 'موثری',
 'کوثر',
 'اکثر',
 'ثریتونه',
 'دراکثر',
 'موثرند',
 'متأثر',
 'تأثری',
 'ثروتمندترین',
 'خشثریه',
 'اثرات',
 'بی\u200cاثر',
 'کثرت',
 'کوثری',
 'أکثرهم',
 'موثرتر',
 'متاثر',
 'مؤثر',
 'مؤثری',
 'متکثر',
 'هیثرو',
 'اثر\u200cها',
 'اکثریت',
 'میثره',
 'تکثرگرایی',
 'ثریا',
 'حدکثر',
 'پرثروت']

In [11]:
# Testing index size and index for 'Newton'

print(len(index))
print(index['نیوتون'])

93786
{'3415': {'text': [2400, 2410]}, '4058': {'text': [944]}, '4659': {'text': [1745]}, '5237': {'text': [4137]}, '6054': {'text': [1612]}, '7013': {'text': [797, 2202]}}


In [12]:
def add_document_to_indexes(docs_path, doc_num):
    file_name = docs_path + '/' + str(doc_num) + '.xml'
    global index
    doc = open(file_name, encoding = 'utf8')
    doc_dict = parse_next_doc(doc)
        
    if doc_dict:
        if doc_dict.get('id') in doc_ids:
            return
        add_zone_to_index(doc_dict, 'title')                
        add_zone_to_index(doc_dict, 'text')
        doc_ids.append(doc_dict.get('id'))

add_document_to_indexes('data/wiki', 1)

In [13]:
# Doc 650 added to newton index

print(len(index))
print(index['نیوتون'])

93951
{'3415': {'text': [2400, 2410]}, '4058': {'text': [944]}, '4659': {'text': [1745]}, '5237': {'text': [4137]}, '6054': {'text': [1612]}, '7013': {'text': [797, 2202]}, '650': {'text': [8, 34, 132, 136, 209, 2778, 3508, 3546, 3573, 3617, 3664, 3698, 3796, 3852]}}


In [14]:
def remove_zone_from_index(doc_dict, zone):
    doc_id = doc_dict.get('id')
    prepared_zone = prepare_text(doc_dict.get(zone), True)
    for term in prepared_zone:
            try:
                del index[term][doc_id]
                if not bool(index[term]):
                    del index[term]
            except:
                pass

In [15]:
def delete_document_from_indexes(docs_path, doc_num):
    file_name = docs_path + '/' + str(doc_num) + '.xml'
    global index
    doc = open(file_name, encoding = 'utf8')
    doc_dict = parse_next_doc(doc)
    
    if doc_dict:
        if doc_dict.get('id') not in doc_ids:
            return
        remove_zone_from_index(doc_dict, 'title')
        remove_zone_from_index(doc_dict, 'text')
        doc_ids.remove(doc_dict.get('id'))

delete_document_from_indexes('data/wiki', 1)

In [16]:
# Newton index back to normal

print(len(index))
print(index['نیوتون'])

93786
{'3415': {'text': [2400, 2410]}, '4058': {'text': [944]}, '4659': {'text': [1745]}, '5237': {'text': [4137]}, '6054': {'text': [1612]}, '7013': {'text': [797, 2202]}}


In [17]:
def save_index(destination):
    if not os.path.exists(destination):
        os.makedirs(destination)
    file_name = destination + '/index.json'
    with open(file_name, 'w') as json_file:
        json.dump(index, json_file)

save_index('storage/index_backup')

In [18]:
def load_index(source):
    if not os.path.exists(source):
        return "No such directory"
    file_name = source + '/index.json'
    global index
    with open(file_name) as json_file:
        index = json.load(json_file)

load_index('storage/index_backup')

In [19]:
# Testing newton again to see if index is the same...

print(len(index))
print(index['نیوتون'])

93786
{'3415': {'text': [2400, 2410]}, '4058': {'text': [944]}, '4659': {'text': [1745]}, '5237': {'text': [4137]}, '6054': {'text': [1612]}, '7013': {'text': [797, 2202]}}


# 3- Document retreival, Scoring and VSM

## 3.1- Spelling Correction

In [20]:
def jaccard_coeff(token_set1, token_set2):
    intersection = [b for b in token_set1 if b in token_set2]
    union = [b for b in token_set1 or b in token_set2]
    return  len(intersection) / len(union) 

In [21]:
# One-word (non-context sensitive) query correction based on jaccard coeeficient and edit distance

def correct_query(query):
    query_terms = prepare_text(query, stem_flag = False)
    print(query_terms)
    for term in query_terms:
        if term not in bigram_dict:
            marked_term = '$' + term + '$'
            term_bigrams = [marked_term[i:i + 2] for i in range(0, len(marked_term) - 1)]
            candidate_set = []

            for b in term_bigrams:
                candidate_set.extend(list(bigram_index[b]))
            candidate_set = list(set(candidate_set))
            
            # filtering based on jaccard coefficient
            jaccard_candidates = []
            threshold = 0.5
            for j in range(len(candidate_set)):
                marked_c = '$' + candidate_set[j] + '$'
                candidate_bigrams = [marked_c[i:i + 2] for i in range(0, len(marked_c) - 1)]
                if jaccard_coeff(candidate_bigrams, term_bigrams) > threshold:
                      jaccard_candidates.append(candidate_set[j]) 
                        
            # filtering based on edit distance  
            edit_distances = [editdistance.eval(jaccard_candidates[j], term) for j in range(len(jaccard_candidates))]
            final_candidates = [jaccard_candidates[j] for j in range(len(jaccard_candidates)) if edit_distances[j] == min(edit_distances)]
            idx = query_terms.index(term)
            print(term, final_candidates)
            query_terms[idx] = random.choice(final_candidates)
            
            
    correct_query = ' '.join(query_terms)
    return correct_query

correct_query("شلام حالا برسهان درسک شد")

['شلام', 'حالا', 'برسهان', 'درسک', 'شد']
شلام ['غلام', 'آلام', 'شلال', 'لام', 'شلقم', 'کلام', 'سلام', 'شلاق', 'شلیم', 'بلام', 'شرام', 'شلغم', 'شام']
برسهان ['برسلان', 'برسیان', 'برهان']
درسک ['درسی', 'ارسک', 'دریک', 'درسر', 'درسبک', 'دیسک', 'درک', 'دراک', 'درست', 'پرسک', 'درس', 'دسک', 'درسش']


'شلاق حالا برهان پرسک شد'

## 3.2- Phrasal (Exact) Search

In [22]:
# Phrasal search based on zone(s)

def phrasal_search(phrase, zones = ['title', 'text']):
    phrase_terms = prepare_text(phrase)
    if len(phrase_terms) == 1:
        posting_list = get_posting_list(phrase)
        if len(posting_list) > 0:
            return list(posting_list.keys())
        return []
        
    common_docs = get_posting_list(phrase_terms[0]).keys()
    relevant_docs = []
    
    for term in phrase_terms:
        posting_list = index[term]
        common_docs = list(set(common_docs) & set(index[term].keys()))
    
    for zone in zones:  
        for doc in common_docs:
            try:
                for pos in index[phrase_terms[0]][doc][zone]:
                    flag = True
                    for i in range(1, len(phrase_terms)):
                        if not (pos +  i) in index[phrase_terms[i]][doc][zone]:
                            flag = False
                            break
                    if flag and doc not in relevant_docs:
                        relevant_docs.append(doc)
                        break           
            except:
                pass
    return relevant_docs        
            
phrasal_search('نظرخواهی انجام شده توسط دانشگاه') 

['6824']

## 3.3- tf-idf Scoring and vector space model

In [23]:
# Building static tf-idf matrixes

def build_df_tf():
    tf = {}
    df = {}
    for term, posting in index.items():
        tf[term] = {}
        df[term] = {}
        df[term]['title'] = 0
        df[term]['text'] = 0
        
        for doc in posting:
            tf[term][doc] = {}
            try:
                tf[term][doc]['title'] = len(index[term][doc]['title'])
                df[term]['title'] += 1
            except:
                pass
            try:    
                tf[term][doc]['text'] = len(index[term][doc]['text'])
                df[term]['text'] += 1
            except:
                pass
    return tf, df  

In [24]:
tf = {}
df = {}
tf, df = build_df_tf()

In [25]:
# tf-idf test

word = 'نیوتون'
print(index[word])
print(len(index[word]))
print(tf[word]['3415'])
print(tf[word])
print(df[word])

{'3415': {'text': [2400, 2410]}, '4058': {'text': [944]}, '4659': {'text': [1745]}, '5237': {'text': [4137]}, '6054': {'text': [1612]}, '7013': {'text': [797, 2202]}}
6
{'text': 2}
{'3415': {'text': 2}, '4058': {'text': 1}, '4659': {'text': 1}, '5237': {'text': 1}, '6054': {'text': 1}, '7013': {'text': 2}}
{'title': 0, 'text': 6}


In [26]:
# Finding |max_retrieved| docs which are most relevant to query

def search(query, method="ltn-lnn", weight=2, max_retrieved=15):
    relevant_docs = [] 
    scores = []
    phrases = re.findall(r'"(.*?)"', query)
    non_phrase = re.sub(r'"(.*?)"', ' ', query)
    query_terms = prepare_text(query)
    non_phrase_terms = prepare_text(non_phrase)
    
    for phrase in phrases + non_phrase_terms:
        relevant_docs.extend(phrasal_search(phrase))
            
    relevant_docs = list(set(relevant_docs))
    scores = [0] * len(relevant_docs)
        
    w_q = [0] * len(query_terms)
    
    # compute query weight vector
    for i in range(len(query_terms)):
        term = query_terms[i]
        w_q[i] = math.log10(query_terms.count(term)) + 1

    # cosine normalization
    if method[2] == 'c': 
        w_q = w_q / np.linalg.norm(w_q)
    
    for doc in relevant_docs:
        w_d = [0] * len(query_terms)
        for i in range(len(query_terms)):
            term = query_terms[i]
            w_td_title, w_td_text = 0, 0
            n = len(doc_ids)
            try:
                w_td_title = (math.log10(tf[term][doc]['title']) + 1) * math.log10(n / df[term]['title'])
            except:
                pass
            try:
                w_td_text = (math.log10(tf[term][doc]['text']) + 1) * math.log10(n / df[term]['text'])
            except:
                pass
            w_d[i] = w_td_title * 2 + w_td_text
            
        doc_idx = relevant_docs.index(doc)
        # cosine normalization
        if method[2] == 'c':
            w_d = w_d / np.linalg.norm(w_d)
        scores[doc_idx] = np.dot(w_d, w_q)
    
    # find k top docs according to score with a heap
    relevant_docs = heapq.nlargest(min(len(relevant_docs), max_retrieved), zip(scores, relevant_docs))
    relevant_docs = [int(x) for (y, x) in relevant_docs]
    return relevant_docs

search('"نظرخواهی انجام شده توسط دانشگاه" شهر نیویورک', "ltc-lnc", 3)

[3197,
 5509,
 3039,
 3099,
 3747,
 6694,
 6749,
 4321,
 5293,
 6915,
 6824,
 4838,
 6772,
 3777,
 5508]

In [27]:
# Searching is done seperately in each zone

def detailed_search(title_query, text_query, method="ltn-lnn", max_retrieved=15):
    relevant_docs = []
    scores = []
    title_phrases = re.findall(r'"(.*?)"', title_query)
    text_phrases = re.findall(r'"(.*?)"', text_query)
    title_non_phrase = re.sub(r'"(.*?)"', ' ', title_query)
    text_non_phrase = re.sub(r'"(.*?)"', ' ', text_query)
    non_phrase_title_terms = prepare_text(title_non_phrase)
    non_phrase_text_terms = prepare_text(text_non_phrase)
    
    for phrase in title_phrases + non_phrase_title_terms:
        relevant_docs.extend(phrasal_search(phrase, zones=['title']))
        
    for phrase in text_phrases + non_phrase_text_terms:
        relevant_docs.extend(phrasal_search(phrase, zones=['text']))
        
    query_terms = prepare_text(title_query) + prepare_text(text_query)
            
    relevant_docs = list(set(relevant_docs))
    scores = [0] * len(relevant_docs)
    w_q = [0] * len(query_terms)
    
    # compute query weight vector
    for i in range(len(query_terms)):
        term = query_terms[i]
        w_q[i] = math.log10(query_terms.count(term)) + 1

    # cosine normalization
    if method[2] == 'c': 
        w_q = w_q / np.linalg.norm(w_q)
    
    for doc in relevant_docs:
        w_d = [0] * len(query_terms)
        for i in range(len(query_terms)):
            term = query_terms[i]
            w_td_title, w_td_text = 0, 0
            n = len(doc_ids)
            try:
                w_td_title = (math.log10(tf[term][doc]['title']) + 1) * math.log10(n / df[term]['title'])
            except:
                pass
            try:
                w_td_text = (math.log10(tf[term][doc]['text']) + 1) * math.log10(n / df[term]['text'])
            except:
                pass
            w_d[i] = w_td_title * 2 + w_td_text
            
        doc_idx = relevant_docs.index(doc)
        # cosine normalization
        if method[2] == 'c':
            w_d = w_d / np.linalg.norm(w_d)
        scores[doc_idx] = np.dot(w_d, w_q)
                  
  
    # find k top docs according to score with a heap
    relevant_docs = heapq.nlargest(min(len(relevant_docs), max_retrieved), zip(scores, relevant_docs))
    relevant_docs = [int(x) for (y, x) in relevant_docs]
    return relevant_docs

detailed_search('عجایب "هفت‌گانه"', 'چشمگیرترین بناهای تاریخی جهان', "ltc-lnc")

[3260,
 4530,
 6969,
 6752,
 5967,
 7143,
 6949,
 5293,
 3666,
 5192,
 4094,
 3938,
 3120,
 3874,
 6917]

# 4- IR System Evaluation with R-precision, F-measure, MAP and NDCG

In [28]:
# Reading test queries and their relevant documents

def read_queries(query_id='all'):
    if query_id == 'all':
        queries, relevance = [], []
        for file in glob.glob('data/queries/*.txt'):
            query = []
            with open(file, encoding = 'utf8') as query_file:
                for line in query_file:
                    query.append(line)   
            queries.append(query)  
        for file in glob.glob('data/relevance/*.txt'):
            with open(file, encoding = 'utf8') as relevance_file:
                relevance.append([int(x) for x in relevance_file.read().split(',')]) 
        return queries, relevance   
    else:
        query = []
        with open('data/queries/%s.txt'%(query_id,), encoding = 'utf8') as query_file:
            for line in query_file:
                query.append(line)
        with open('data/relevance/%s.txt'%(query_id,)) as relevance_file:   
            relevance = [int(x) for x in relevance_file.read().split(',')]
        return query, relevance 

In [29]:
# Searching for related documents

def get_related_docs(query, max_retrieved=15):
    if len(query) == 1:
        return search(query[0], "ltc-lnc", max_retrieved=max_retrieved)
    elif len(query) == 2:
        return detailed_search(query[0], query[1], "ltn-lnn")

In [30]:
# Computing the f-measure for relevant and retrieved docs

def compute_f_measure(retrieved, relevant):
    alpha = 0.5
    p = len(list(set(retrieved) & set(relevant))) / len(retrieved)
    r = len(list(set(retrieved) & set(relevant))) / len(relevant)
    beta2 = (1 - alpha) / alpha
    f_measure = (beta2 + 1) * p * r / (beta2 * p + r)
    return f_measure

In [31]:
# Computing the average precision for relevant and retrieved docs

def compute_avg_precision(retrieved, relevant):
    indices = [i for i in range(len(retrieved)) if retrieved[i] in relevant]
    p = np.zeros(len(retrieved))
    p[indices] = 1   
    p = np.divide(np.cumsum(p) * p, np.arange(1, len(retrieved) + 1))
    p = p[p > 0]
    return np.mean(p)

In [32]:
# Computing the NDCG for relevant and retrieved docs

def compute_ndcg(retrieved, relevant):
    indices = [i for i in range(len(retrieved)) if retrieved[i] in relevant]
    p = np.zeros(len(retrieved))
    p[indices] = 1 
    ideal = np.array([1] * int(np.sum(p)) + [0] * (len(p) - int(np.sum(p))))
    dcg_rf = p[0] + sum(np.divide(p[1:], np.log2(np.arange(2, len(retrieved) + 1))))
    dcg_gt = ideal[0] + sum(np.divide(ideal[1:], np.log2(np.arange(2, len(retrieved) + 1))))
    ndcg = dcg_rf / dcg_gt
    return ndcg

In [40]:
def R_Precision(query_id='all'):
    if query_id == 'all':
        r_precision = []
        queries, relevants = read_queries(query_id)
        for i in range(len(queries)):
            retrieved = get_related_docs(queries[i], max_retrieved=len(relevants[i]))
            r_precision.append(len(list(set(retrieved) & set(relevants[i]))) / len(relevants[i]))
        r_precision = np.mean(r_precision)
    else:
        query, relevant = read_queries(query_id)
        retrieved = get_related_docs(query, max_retrieved=len(relevant))
        r_precision = len(list(set(retrieved) & set(relevant))) / len(relevant)

    return r_precision


def F_measure(query_id='all'):
    if query_id == 'all':
        queries, relevants = read_queries(query_id)
        f_measures = []
        for i in range(len(queries)):
            retrieved = get_related_docs(queries[i])
            f_measures.append(compute_f_measure(retrieved, relevants[i]))
        f_measure = np.mean(f_measures)    
    else:
        query, relevant = read_queries(query_id)
        retrieved = get_related_docs(query)
        f_measure = compute_f_measure(retrieved, relevant)
    
    return f_measure
  

def MAP(query_id='all'):
    if query_id == 'all':
        queries, relevants = read_queries(query_id)
        average_p = []
        for j in range(len(queries)):
            retrieved = get_related_docs(queries[j])
            average_p.append(compute_avg_precision(retrieved, relevants[j]))            
        map_value = np.mean(average_p)    
    else:
        query, relevant = read_queries(query_id)
        retrieved = get_related_docs(query)
        map_value = compute_avg_precision(retrieved, relevant)
        
    return map_value


def NDCG(query_id='all'):
    if query_id == 'all':
        queries, relevants = read_queries(query_id)
        ndcg_values = []
        for j in range(len(queries)):
            retrieved = get_related_docs(queries[j])
            ndcg_values.append(compute_ndcg(retrieved, relevants[j]))
        ndcg_value = np.mean(ndcg_values)    
    else:
        query, relevant = read_queries(query_id)
        retrieved = get_related_docs(query)
        ndcg_value = compute_ndcg(retrieved, relevant)
        
    return ndcg_value


print(R_Precision())
print(F_measure())
print(MAP())
print(NDCG())

0.6046041053192083
0.5495403840954625
0.706255989361921
0.8119605069912865
