In [1]:
! pip3 install transformers=='4.48.3'

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [2]:
! pip install beir=='2.0.0'

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [3]:
! nvidia-smi

Wed Mar 19 09:14:17 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 565.77                 Driver Version: 565.77         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A5000               On  |   00000000:03:00.0 Off |                  Off |
|  0%   24C    P8             26W /  230W |       2MiB /  24564MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [4]:
import os
import pathlib
import logging
from datetime import timedelta
from typing import List, Dict, Union, Tuple

import numpy as np
import torch
from torch import Tensor
import torch.distributed as dist
from tqdm import trange
from transformers import AutoTokenizer, AutoModel
from transformers.file_utils import PaddingStrategy

from beir import util, LoggingHandler
from beir.retrieval import models
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES

# Configure logging
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])
logger = logging.getLogger(__name__)

def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
    last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
    sequence_lengths = attention_mask.sum(dim=1) - 1
    batch_size = last_hidden.shape[0]
    embedding = last_hidden[torch.arange(batch_size, device=last_hidden.device), sequence_lengths]
    return embedding

# The prompt for queries
def get_detailed_instruct_query(task_description: str, query: str) -> str:
    return f'{task_description}\nQuery: {query}'

# The prompt for passages
def get_detailed_instruct_passage(passage: str) -> str:
    return f'Represent this passage\npassage: {passage}'

class SentenceBERT:
    def __init__(self, model_path: Union[str, Tuple] = "BMRetriever/BMRetriever-410M", sep: str = " ", dataset="nfcorpus", **kwargs):
        self.sep = sep
        self.tasks = {
                        'nfcorpus': 'Given a question, retrieve relevant documents that best answer the question',
                        'scifact': 'Given a scientific claim, retrieve documents that support or refute the claim',
                        'trec-covid': 'Given a query on COVID-19, retrieve documents that answer the query',
                     }
        self.task = self.tasks[dataset]
        self.dataset = dataset
        self.model_path = model_path
        self.model = AutoModel.from_pretrained(model_path, torch_dtype=torch.float32).cuda()
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.max_length = 512
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
            self.tokenizer.padding_side = "left"

    @torch.no_grad()
    def encode(self, input_texts: List[str], batch_size: int = 32, **kwargs) -> Tensor:
        embeddings = []
        self.model.eval()
        for i in trange(0, len(input_texts), batch_size):
            input_text = input_texts[i: (i+batch_size)]
            batch_dict = self.tokenizer(
                input_text,
                max_length=self.max_length-1,
                return_attention_mask=False,
                return_token_type_ids=False,
                padding=PaddingStrategy.DO_NOT_PAD,
                truncation=True
            )
            with torch.cuda.amp.autocast():
                batch_dict['input_ids'] = [input_ids + [self.tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
                batch_dict = self.tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt').to("cuda")
                outputs = self.model(**batch_dict)
                embedding = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
                embeddings.append(embedding)
        embeddings = torch.cat(embeddings, dim=0)
        logger.info(f"Embeddings shape: {embeddings.shape}")
        return embeddings

    def encode_queries(self, queries: List[str], batch_size: int = 32, **kwargs) -> Tensor:
        queries = [get_detailed_instruct_query(self.task, query) for query in queries]
        embeddings = self.encode(queries, batch_size=batch_size, **kwargs)
        return embeddings

    def encode_corpus(self, corpus: Union[List[Dict[str, str]], Dict[str, List]], batch_size: int = 32, **kwargs) -> Tensor:
        if isinstance(corpus, dict):
            sentences = [(corpus["title"][i] + self.sep + corpus["text"][i]).strip() if "title" in corpus else corpus["text"][i].strip() for i in range(len(corpus['text']))]
        else:
            sentences = [(doc["title"] + self.sep + doc["text"]).strip() if "title" in doc else doc["text"].strip() for doc in corpus]
        sentences = [get_detailed_instruct_passage(passage) for passage in sentences]
        embeddings = self.encode(sentences, batch_size=batch_size, **kwargs)
        return embeddings

In [6]:
bm_ret = DRES(SentenceBERT(model_path="BMRetriever/BMRetriever-410M"), dataset = 'nfcorpus', batch_size=32)
retriever_bm_ret = EvaluateRetrieval(bm_ret, score_function="dot") # or "cos_sim" for cosine similarity

In [7]:
# Download and load dataset
dataset = "nfcorpus" # dataset name
url = f"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{dataset}.zip"
out_dir = "datasets"
data_path = util.download_and_unzip(url, out_dir)

In [8]:
# Load corpus, queries, and qrels
corpus, queries, qrels = GenericDataLoader(data_folder=data_path).load(split="test")

2025-03-19 09:14:55 - Loading Corpus...


  0%|          | 0/3633 [00:00<?, ?it/s]

2025-03-19 09:14:55 - Loaded 3633 TEST Documents.
2025-03-19 09:14:55 - Doc Example: {'text': 'Recent studies have suggested that statins, an established drug group in the prevention of cardiovascular mortality, could delay or prevent breast cancer recurrence but the effect on disease-specific mortality remains unclear. We evaluated risk of breast cancer death among statin users in a population-based cohort of breast cancer patients. The study cohort included all newly diagnosed breast cancer patients in Finland during 1995–2003 (31,236 cases), identified from the Finnish Cancer Registry. Information on statin use before and after the diagnosis was obtained from a national prescription database. We used the Cox proportional hazards regression method to estimate mortality among statin users with statin use as time-dependent variable. A total of 4,151 participants had used statins. During the median follow-up of 3.25 years after the diagnosis (range 0.08–9.0 years) 6,011 participants die

In [9]:
import json
# load paraphrased dataset
with open(f"{dataset}_query_paraphrased_gpt4o.json", encoding='utf-8') as f:
    # Load the JSON data into a Python dictionary
    queries_para = json.load(f)

In [10]:
queries_p = {}
for q in queries_para:
  queries_p[q] = queries_para[q]['query_p']

In [11]:
results_bm_ret = retriever_bm_ret.retrieve(corpus, queries_p)

2025-03-19 09:15:00 - Encoding Queries...


  with torch.cuda.amp.autocast():
You're using a GPTNeoXTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
100%|██████████| 11/11 [00:01<00:00,  9.31it/s]


2025-03-19 09:15:01 - Embeddings shape: torch.Size([323, 1024])
2025-03-19 09:15:01 - Sorting Corpus by document length (Longest first)...
2025-03-19 09:15:01 - Scoring Function: Dot Product (dot)
2025-03-19 09:15:01 - Encoding Batch 1/1...


100%|██████████| 114/114 [00:20<00:00,  5.58it/s]


2025-03-19 09:15:21 - Embeddings shape: torch.Size([3633, 1024])


In [13]:
# Evaluate your model with NDCG@k, MAP@K, Recall@K and Precision@K where k = [1,3,5,10,100,1000]
ndcg, _map, recall, precision = retriever_bm_ret.evaluate(qrels, results_bm_ret, retriever_bm_ret.k_values)

2025-03-19 09:17:15 - For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-03-19 09:17:15 - 

2025-03-19 09:17:15 - NDCG@1: 0.3158
2025-03-19 09:17:15 - NDCG@3: 0.2855
2025-03-19 09:17:15 - NDCG@5: 0.2654
2025-03-19 09:17:15 - NDCG@10: 0.2447
2025-03-19 09:17:15 - NDCG@100: 0.2385
2025-03-19 09:17:15 - NDCG@1000: 0.3341
2025-03-19 09:17:15 - 

2025-03-19 09:17:15 - MAP@1: 0.0384
2025-03-19 09:17:15 - MAP@3: 0.0595
2025-03-19 09:17:15 - MAP@5: 0.0705
2025-03-19 09:17:15 - MAP@10: 0.0833
2025-03-19 09:17:15 - MAP@100: 0.1071
2025-03-19 09:17:15 - MAP@1000: 0.1195
2025-03-19 09:17:15 - 

2025-03-19 09:17:15 - Recall@1: 0.0384
2025-03-19 09:17:15 - Recall@3: 0.0690
2025-03-19 09:17:15 - Recall@5: 0.0867
2025-03-19 09:17:15 - Recall@10: 0.1194
2025-03-19 09:17:15 - Recall@100: 0.2609
2025-03-19 09:17:15 - Recall@1000: 0.5879
2025-03-19 09:17:15 - 

2025-03-19 09:17:15 - P@1: 0.3313
2025-03-19 09:17:15

In [14]:
print(f"Model: BMRet; Dataset: {dataset} (paraphrased)")
print("-" * 150)
print(ndcg)
print(_map)
print(recall)
print(precision)

Model: BMRet; Dataset: nfcorpus (paraphrased)
------------------------------------------------------------------------------------------------------------------------------------------------------
{'NDCG@1': 0.31579, 'NDCG@3': 0.28547, 'NDCG@5': 0.26537, 'NDCG@10': 0.24469, 'NDCG@100': 0.23854, 'NDCG@1000': 0.33409}
{'MAP@1': 0.03845, 'MAP@3': 0.0595, 'MAP@5': 0.07052, 'MAP@10': 0.08325, 'MAP@100': 0.10706, 'MAP@1000': 0.11951}
{'Recall@1': 0.03845, 'Recall@3': 0.06901, 'Recall@5': 0.08669, 'Recall@10': 0.11941, 'Recall@100': 0.26092, 'Recall@1000': 0.5879}
{'P@1': 0.33127, 'P@3': 0.26935, 'P@5': 0.22972, 'P@10': 0.18421, 'P@100': 0.06288, 'P@1000': 0.0195}


In [15]:
bm_ret = DRES(SentenceBERT(model_path="BMRetriever/BMRetriever-410M"), dataset = 'scifact', batch_size=32)
retriever_bm_ret = EvaluateRetrieval(bm_ret, score_function="dot") # or "cos_sim" for cosine similarity

In [16]:
# Download and load dataset
dataset = "scifact" # dataset name
url = f"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{dataset}.zip"
out_dir = "datasets"
data_path = util.download_and_unzip(url, out_dir)

2025-03-19 09:18:37 - Downloading scifact.zip ...


datasets/scifact.zip:   0%|          | 0.00/2.69M [00:00<?, ?iB/s]

2025-03-19 09:18:37 - Unzipping scifact.zip ...


In [17]:
# Load corpus, queries, and qrels
corpus, queries, qrels = GenericDataLoader(data_folder=data_path).load(split="test")

2025-03-19 09:18:50 - Loading Corpus...


  0%|          | 0/5183 [00:00<?, ?it/s]

2025-03-19 09:18:51 - Loaded 5183 TEST Documents.
2025-03-19 09:18:51 - Doc Example: {'text': 'Alterations of the architecture of cerebral white matter in the developing human brain can affect cortical development and result in functional disabilities. A line scan diffusion-weighted magnetic resonance imaging (MRI) sequence with diffusion tensor analysis was applied to measure the apparent diffusion coefficient, to calculate relative anisotropy, and to delineate three-dimensional fiber architecture in cerebral white matter in preterm (n = 17) and full-term infants (n = 7). To assess effects of prematurity on cerebral white matter development, early gestation preterm infants (n = 10) were studied a second time at term. In the central white matter the mean apparent diffusion coefficient at 28 wk was high, 1.8 microm2/ms, and decreased toward term to 1.2 microm2/ms. In the posterior limb of the internal capsule, the mean apparent diffusion coefficients at both times were similar (1.2 vers

In [18]:
import json
# load paraphrased dataset
with open(f"{dataset}_query_paraphrased_gpt4o.json", encoding='utf-8') as f:
    # Load the JSON data into a Python dictionary
    queries_para = json.load(f)

In [19]:
queries_p = {}
for q in queries_para:
  queries_p[q] = queries_para[q]['query_p']

In [20]:
results_bm_ret = retriever_bm_ret.retrieve(corpus, queries_p)

2025-03-19 09:19:33 - Encoding Queries...


  with torch.cuda.amp.autocast():
You're using a GPTNeoXTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
100%|██████████| 10/10 [00:00<00:00, 31.56it/s]


2025-03-19 09:19:34 - Embeddings shape: torch.Size([300, 1024])
2025-03-19 09:19:34 - Sorting Corpus by document length (Longest first)...
2025-03-19 09:19:34 - Scoring Function: Dot Product (dot)
2025-03-19 09:19:34 - Encoding Batch 1/1...


100%|██████████| 162/162 [00:26<00:00,  6.16it/s]


2025-03-19 09:20:00 - Embeddings shape: torch.Size([5183, 1024])


In [21]:
# Evaluate your model with NDCG@k, MAP@K, Recall@K and Precision@K where k = [1,3,5,10,100,1000]
ndcg, _map, recall, precision = retriever_bm_ret.evaluate(qrels, results_bm_ret, retriever_bm_ret.k_values)

2025-03-19 09:20:53 - For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-03-19 09:20:53 - 

2025-03-19 09:20:53 - NDCG@1: 0.5667
2025-03-19 09:20:53 - NDCG@3: 0.6285
2025-03-19 09:20:53 - NDCG@5: 0.6537
2025-03-19 09:20:53 - NDCG@10: 0.6770
2025-03-19 09:20:53 - NDCG@100: 0.7082
2025-03-19 09:20:53 - NDCG@1000: 0.7159
2025-03-19 09:20:53 - 

2025-03-19 09:20:53 - MAP@1: 0.5403
2025-03-19 09:20:53 - MAP@3: 0.6029
2025-03-19 09:20:53 - MAP@5: 0.6210
2025-03-19 09:20:53 - MAP@10: 0.6322
2025-03-19 09:20:53 - MAP@100: 0.6405
2025-03-19 09:20:53 - MAP@1000: 0.6407
2025-03-19 09:20:53 - 

2025-03-19 09:20:53 - Recall@1: 0.5403
2025-03-19 09:20:53 - Recall@3: 0.6726
2025-03-19 09:20:53 - Recall@5: 0.7350
2025-03-19 09:20:53 - Recall@10: 0.8012
2025-03-19 09:20:53 - Recall@100: 0.9333
2025-03-19 09:20:53 - Recall@1000: 0.9967
2025-03-19 09:20:53 - 

2025-03-19 09:20:53 - P@1: 0.5667
2025-03-19 09:20:53

In [22]:
print(f"Model: BMRet; Dataset: {dataset} (paraphrased)")
print("-" * 150)
print(ndcg)
print(_map)
print(recall)
print(precision)

Model: BMRet; Dataset: scifact (paraphrased)
------------------------------------------------------------------------------------------------------------------------------------------------------
{'NDCG@1': 0.56667, 'NDCG@3': 0.62847, 'NDCG@5': 0.65373, 'NDCG@10': 0.67702, 'NDCG@100': 0.70823, 'NDCG@1000': 0.7159}
{'MAP@1': 0.54028, 'MAP@3': 0.60287, 'MAP@5': 0.62103, 'MAP@10': 0.6322, 'MAP@100': 0.64049, 'MAP@1000': 0.64072}
{'Recall@1': 0.54028, 'Recall@3': 0.67256, 'Recall@5': 0.735, 'Recall@10': 0.80122, 'Recall@100': 0.93333, 'Recall@1000': 0.99667}
{'P@1': 0.56667, 'P@3': 0.24556, 'P@5': 0.164, 'P@10': 0.09067, 'P@100': 0.01063, 'P@1000': 0.00113}


In [23]:
bm_ret = DRES(SentenceBERT(model_path="BMRetriever/BMRetriever-410M"), dataset = 'trec-covid', batch_size=32)
retriever_bm_ret = EvaluateRetrieval(bm_ret, score_function="dot") # or "cos_sim" for cosine similarity

In [24]:
# Download and load dataset
dataset = "trec-covid" # dataset name
url = f"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{dataset}.zip"
out_dir = "datasets"
data_path = util.download_and_unzip(url, out_dir)

2025-03-19 09:23:05 - Downloading trec-covid.zip ...


datasets/trec-covid.zip:   0%|          | 0.00/70.5M [00:00<?, ?iB/s]

2025-03-19 09:23:12 - Unzipping trec-covid.zip ...


In [25]:
# Load corpus, queries, and qrels
corpus, queries, qrels = GenericDataLoader(data_folder=data_path).load(split="test")

2025-03-19 09:23:48 - Loading Corpus...


  0%|          | 0/171332 [00:00<?, ?it/s]

2025-03-19 09:23:50 - Loaded 171332 TEST Documents.
2025-03-19 09:23:50 - Doc Example: {'text': 'OBJECTIVE: This retrospective chart review describes the epidemiology and clinical features of 40 patients with culture-proven Mycoplasma pneumoniae infections at King Abdulaziz University Hospital, Jeddah, Saudi Arabia. METHODS: Patients with positive M. pneumoniae cultures from respiratory specimens from January 1997 through December 1998 were identified through the Microbiology records. Charts of patients were reviewed. RESULTS: 40 patients were identified, 33 (82.5%) of whom required admission. Most infections (92.5%) were community-acquired. The infection affected all age groups but was most common in infants (32.5%) and pre-school children (22.5%). It occurred year-round but was most common in the fall (35%) and spring (30%). More than three-quarters of patients (77.5%) had comorbidities. Twenty-four isolates (60%) were associated with pneumonia, 14 (35%) with upper respiratory tract 

In [26]:
import json
# load paraphrased dataset
with open(f"{dataset}_query_paraphrased_gpt4o.json", encoding='utf-8') as f:
    # Load the JSON data into a Python dictionary
    queries_para = json.load(f)

In [27]:
queries_p = {}
for q in queries_para:
  queries_p[q] = queries_para[q]['query_p']

In [28]:
results_bm_ret = retriever_bm_ret.retrieve(corpus, queries_p)

2025-03-19 09:24:27 - Encoding Queries...


  with torch.cuda.amp.autocast():
You're using a GPTNeoXTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
100%|██████████| 2/2 [00:00<00:00, 19.57it/s]

2025-03-19 09:24:27 - Embeddings shape: torch.Size([50, 1024])
2025-03-19 09:24:27 - Sorting Corpus by document length (Longest first)...





2025-03-19 09:24:28 - Scoring Function: Dot Product (dot)
2025-03-19 09:24:28 - Encoding Batch 1/4...


100%|██████████| 1563/1563 [05:50<00:00,  4.46it/s]


2025-03-19 09:30:19 - Embeddings shape: torch.Size([50000, 1024])
2025-03-19 09:30:19 - Encoding Batch 2/4...


100%|██████████| 1563/1563 [04:06<00:00,  6.34it/s]


2025-03-19 09:34:25 - Embeddings shape: torch.Size([50000, 1024])
2025-03-19 09:34:25 - Encoding Batch 3/4...


100%|██████████| 1563/1563 [01:43<00:00, 15.09it/s]


2025-03-19 09:36:09 - Embeddings shape: torch.Size([50000, 1024])
2025-03-19 09:36:09 - Encoding Batch 4/4...


100%|██████████| 667/667 [00:14<00:00, 44.53it/s]

2025-03-19 09:36:24 - Embeddings shape: torch.Size([21332, 1024])





In [29]:
# Evaluate your model with NDCG@k, MAP@K, Recall@K and Precision@K where k = [1,3,5,10,100,1000]
ndcg, _map, recall, precision = retriever_bm_ret.evaluate(qrels, results_bm_ret, retriever_bm_ret.k_values)

2025-03-19 09:37:33 - For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-03-19 09:37:33 - 

2025-03-19 09:37:33 - NDCG@1: 0.5900
2025-03-19 09:37:33 - NDCG@3: 0.6253
2025-03-19 09:37:33 - NDCG@5: 0.6225
2025-03-19 09:37:33 - NDCG@10: 0.6062
2025-03-19 09:37:33 - NDCG@100: 0.4617
2025-03-19 09:37:33 - NDCG@1000: 0.4179
2025-03-19 09:37:33 - 

2025-03-19 09:37:33 - MAP@1: 0.0018
2025-03-19 09:37:33 - MAP@3: 0.0055
2025-03-19 09:37:33 - MAP@5: 0.0087
2025-03-19 09:37:33 - MAP@10: 0.0162
2025-03-19 09:37:33 - MAP@100: 0.0864
2025-03-19 09:37:33 - MAP@1000: 0.1984
2025-03-19 09:37:33 - 

2025-03-19 09:37:33 - Recall@1: 0.0018
2025-03-19 09:37:33 - Recall@3: 0.0058
2025-03-19 09:37:33 - Recall@5: 0.0096
2025-03-19 09:37:33 - Recall@10: 0.0185
2025-03-19 09:37:33 - Recall@100: 0.1175
2025-03-19 09:37:33 - Recall@1000: 0.3922
2025-03-19 09:37:33 - 

2025-03-19 09:37:33 - P@1: 0.6400
2025-03-19 09:37:33

In [30]:
print(f"Model: BMRet; Dataset: {dataset} (paraphrased)")
print("-" * 150)
print(ndcg)
print(_map)
print(recall)
print(precision)

Model: BMRet; Dataset: trec-covid (paraphrased)
------------------------------------------------------------------------------------------------------------------------------------------------------
{'NDCG@1': 0.59, 'NDCG@3': 0.62531, 'NDCG@5': 0.62252, 'NDCG@10': 0.60625, 'NDCG@100': 0.46169, 'NDCG@1000': 0.41788}
{'MAP@1': 0.00184, 'MAP@3': 0.00548, 'MAP@5': 0.00872, 'MAP@10': 0.01615, 'MAP@100': 0.08639, 'MAP@1000': 0.19845}
{'Recall@1': 0.00184, 'Recall@3': 0.00582, 'Recall@5': 0.00964, 'Recall@10': 0.01848, 'Recall@100': 0.11753, 'Recall@1000': 0.39218}
{'P@1': 0.64, 'P@3': 0.7, 'P@5': 0.692, 'P@10': 0.66, 'P@100': 0.4774, 'P@1000': 0.18214}
