In [1]:
%load_ext autoreload
%autoreload 2
%env CUDA_VISIBLE_DEVICES=3,2,1,0

env: CUDA_VISIBLE_DEVICES=3,2,1,0


In [2]:
import logging
import os
import random
import re
import numpy as np
import torch

def set_seed(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    
    random.seed(seed)
    
    np.random.seed(seed)
    
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    
def set_global_logging_level(level=logging.ERROR, prefixes=("",)):
    """
    Override logging levels of different modules based on their name as a prefix.
    It needs to be invoked after the modules have been loaded so that their loggers have been initialized.

    Args:
        level: desired level. Optional. Default is logging.ERROR
        prefixes: list of one or more str prefixes to match (e.g. ["transformers", "torch"]). Optional.
            Default is `[""]` to match all active loggers.
            The match is a case-sensitive `module_name.startswith(prefix)`
    """
    prefix_re = re.compile(fr'^(?:{"|".join(prefixes)})')
    for name in logging.root.manager.loggerDict:
        if re.match(prefix_re, name):
            logging.getLogger(name).setLevel(level)

In [3]:
set_seed(0)
# set_global_logging_level(logging.WARNING, ["elasticsearch"])

## Load examples

In [46]:
import json
samples = []
with open("data/hotpot-dev.tsv") as f:
    for line in f:
        q_id, question, answer, sp_facts = line.strip().split('\t')
        sp_facts = json.loads(sp_facts)
        samples.append((q_id, (question, answer, sp_facts)))
print(len(samples))

7405


## Load sparse retriever and query generator

In [5]:
from retriever import SparseRetriever

sparse_retriever = SparseRetriever('enwiki-20171001-paragraph-5', ['10.60.0.59:9200'], timeout=60)

In [6]:
# %env CLASSPATH=corenlp/*
from drqa.reader import Predictor
import warnings

warnings.filterwarnings("ignore")

# import os
# os.environ['CLASSPATH'] = 'corenlp/*'

qg1 = Predictor(model='ckpts/golden-retriever/hop1.mdl', tokenizer=None, embedding_file='data/glove.840B.300d.txt', num_workers=-1)
qg1.cuda()
# qg1.model.network.to(torch.device('cuda:0'))
qg2 = Predictor(model='ckpts/golden-retriever/hop2.mdl', tokenizer=None, embedding_file='data/glove.840B.300d.txt', num_workers=-1)
qg2.cuda()
# qg2.model.network.to(torch.device('cuda:0'))



## Load dense retriever and query encoder

In [7]:
import faiss
faiss.omp_set_num_threads(16)

In [8]:
from argparse import Namespace

args = Namespace(**{
    "model_name": "roberta-base",
    "model_path": "ckpts/mdr/q_encoder.pt",
    "index_prefix_path": "data/index/mdr/hotpot-paragraph-q-strict.hnsw",
    "index_buffer_size": 50000,
    "max_q_len": 70,
    "max_q_sp_len": 350
})

In [9]:
from transformers import AutoConfig, AutoTokenizer
from mdr.retrieval.models.retriever import RobertaCtxEncoder
from utils.model_utils import load_state

bert_config = AutoConfig.from_pretrained(args.model_name)
tokenizer = AutoTokenizer.from_pretrained(args.model_name)
query_encoder = RobertaCtxEncoder(bert_config, args)
query_encoder = load_state(query_encoder, args.model_path, exact=False)
device = torch.device('cuda:1')
query_encoder.to(device)
# if torch.cuda.device_count() > 1:
#     query_encoder = torch.nn.DataParallel(query_encoder)
query_encoder.eval()

RobertaCtxEncoder(
  (encoder): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [10]:
from dense_indexers import DenseHNSWFlatIndexer, DenseFlatIndexer

vector_size = bert_config.hidden_size
dense_indexer = DenseHNSWFlatIndexer(vector_size, args.index_buffer_size)
dense_indexer.deserialize_from(args.index_prefix_path)

In [11]:
from retriever import DenseRetriever

dense_retriever = DenseRetriever(dense_indexer, query_encoder, tokenizer)

## Load corpus

In [12]:
from utils.data_utils import load_corpus

corpus, title2id = load_corpus('data/corpus/hotpot-paragraph-5.tsv', for_hotpot=True, require_hyperlinks=True)
print(len(corpus))
print(len(title2id))

5232080
5232080


## Generate oracle commands

In [47]:
questions = [sample[1][0] for sample in samples]  # (N,)
questions = [q[:-1] if q.endswith('?') else q for q in questions]

In [48]:
set_global_logging_level(logging.WARNING, ["elasticsearch"])

In [16]:
import redis

query_redis = redis.Redis(host='10.60.1.79', port=6379, db=2, password='redis4zyc', decode_responses=True)
bm25_redis = redis.Redis(host='10.60.1.79', port=6379, db=3, password='redis4zyc', decode_responses=True)  # 0
mdr_redis = redis.Redis(host='10.60.1.79', port=6379, db=4, password='redis4zyc', decode_responses=True)  # 1

In [21]:
hits_list1 = dense_retriever.msearch(questions, max(RET_SIZE, 1000), args.max_q_len, 128)

100%|██████████| 707/707 [11:41<00:00,  1.01it/s]


In [29]:
for question, hits_list in zip(questions, hits_list1):
    if not mdr_redis.exists(question) or (mdr_redis.llen(question) < RET_SIZE and mdr_redis.lindex(question, -1) != 'EOL'):
        hits = hits_list[0]
        mdr_redis.delete(question)
        if len(hits) < RET_SIZE:
            mdr_redis.rpush(question, *(hits + ['EOL']))
        else:
            mdr_redis.rpush(question, *hits)

In [78]:
from html import unescape
from tqdm.auto import tqdm

from utils.data_utils import get_valid_links

faiss.omp_set_num_threads(1)


OBS_SIZE = 1
RET_SIZE = 100
HOP_MAX_STEPS = (RET_SIZE + OBS_SIZE - 1) // OBS_SIZE
all_oracle_cmds = []
all_oracle_steps = []
all_programed_steps = []
hotpot_filter = {"term": {"for_hotpot": True}}
for q_idx, (q_id, qas) in enumerate(tqdm(samples)):
    question, answer, sp_facts = qas
    if len(sp_facts) < 2:
        print(f"less than 2 supporting facts: {q_id}")
    norm_sp_titles = set(unescape(t) for t in sp_facts.keys())
    sp_ids = set(title2id[t] for t in norm_sp_titles)
    
    # ==================== hop 1 ====================
    sp_ranks = {strategy: {sp_id: 2 * RET_SIZE for sp_id in sp_ids} for strategy in ["BM25", "BM25+Link", "MDR", "MDR+Link"]}
    
    # BM25
    if not query_redis.exists(question):
        query_redis.set(question, qg1.predict(question, question)[0][0].strip())
    q1 = query_redis.get(question)
    if not bm25_redis.exists(q1) or (bm25_redis.llen(q1) < RET_SIZE and bm25_redis.lindex(q1, -1) != 'EOL'):
        hits = [hit['_id'] for hit in sparse_retriever.search(q1, RET_SIZE, filter_dic=hotpot_filter, n_retrieval=RET_SIZE * 2)]
        bm25_redis.delete(q1)
        if len(hits) < RET_SIZE:
            bm25_redis.rpush(q1, *(hits + ['EOL']))
        else:
            bm25_redis.rpush(q1, *hits)
    bm25_hits = bm25_redis.lrange(q1, 0, -1)
    if bm25_hits[-1] == 'EOL':
        bm25_hits = bm25_hits[:-1]
    for p_idx, p_id in enumerate(bm25_hits[:RET_SIZE]):
        para = corpus[p_id]
        hyperlinks = get_valid_links(para, strict=True)
        if p_id in sp_ids:
            sp_ranks['BM25'][p_id] = min(p_idx, sp_ranks['BM25'][p_id])
        for sp_title in (norm_sp_titles & set(hyperlinks.keys())):
            sp_id = title2id[sp_title]
            sp_ranks['BM25+Link'][sp_id] = min(p_idx, sp_ranks['BM25+Link'][sp_id])
        if max(list(sp_ranks['BM25'].values()) + list(sp_ranks['BM25+Link'].values())) <= p_idx:
            break
    
    # MDR
    qk = questions[q_idx]
#     if not mdr_redis.exists(qk) or (mdr_redis.llen(qk) < RET_SIZE and mdr_redis.lindex(qk, -1) != 'EOL'):
#         hits = dense_retriever.search(questions[q_idx], max(RET_SIZE, 1000), args.max_q_len)[0]
#         mdr_redis.delete(qk)
#         if len(hits) < RET_SIZE:
#             mdr_redis.rpush(qk, *(hits + ['EOL']))
#         else:
#             mdr_redis.rpush(qk, *hits)
    mdr_hits = mdr_redis.lrange(qk, 0, -1)
    if mdr_hits[-1] == 'EOL':
        mdr_hits = mdr_hits[:-1]
    assert len(mdr_hits) > 0
    for p_idx, p_id in enumerate(mdr_hits[:RET_SIZE]):
        para = corpus[p_id]
        hyperlinks = get_valid_links(para, strict=True)
        if p_id in sp_ids:
            sp_ranks['MDR'][p_id] = min(p_idx, sp_ranks['MDR'][p_id])
        for sp_title in norm_sp_titles & set(hyperlinks.keys()):
            sp_id = title2id[sp_title]
            sp_ranks['MDR+Link'][sp_id] = min(p_idx, sp_ranks['MDR+Link'][sp_id])
        if max(list(sp_ranks['MDR'].values()) + list(sp_ranks['MDR+Link'].values())) <= p_idx:
            break
    
    easy_steps, hard_steps = {}, {}
    for strategy, _sp_ranks in sp_ranks.items():
        (easy_sp_id, easy_sp_rank), (hard_sp_id, hard_sp_rank) = sorted(_sp_ranks.items(), key=lambda x: x[1])
        easy_steps[strategy] = (easy_sp_rank + OBS_SIZE) // OBS_SIZE
        hard_steps[strategy] = (hard_sp_rank + OBS_SIZE) // OBS_SIZE
        if strategy.endswith('+Link'):
            easy_steps[strategy] += 1
            hard_steps[strategy] += 1
    programed_steps = easy_steps.copy()
    
    oracle_cmds = []
    oracle_steps = 0
    recalled_sp_ids = []
    if min(min(_sp_ranks.values()) for _sp_ranks in sp_ranks.values()) >= RET_SIZE:
        print(f"Unable recall SP1 through the first {RET_SIZE} retrieval results: {q_id}")
    else:
        strategy = min(easy_steps.keys(), key=lambda k: (easy_steps[k], hard_steps[k]))
        if strategy.endswith('+Link'):
            oracle_cmds.append(f"{strategy[:-5]}({repr(q1) if strategy.startswith('BM25') else '$Q'}, {OBS_SIZE * (easy_steps[strategy] - 1)})")
            oracle_steps += easy_steps[strategy] - 1
            bridge_idx = min(sp_ranks[strategy].values())
            bridge_id = bm25_hits[bridge_idx] if strategy.startswith('BM25') else mdr_hits[bridge_idx]
            bridge_para = corpus[bridge_id]
            tgts = set(get_valid_links(bridge_para, strict=True).keys())
            assert len(norm_sp_titles & tgts) > 0
            for sp_title in (norm_sp_titles & tgts):
                oracle_cmds[-1] += f"; [{bridge_para['title']}]({sp_title})"
                oracle_steps += 1
                recalled_sp_ids.append(title2id[sp_title])
            programed_steps[strategy] = oracle_steps
        else:
            oracle_cmds.append(f"{strategy}({repr(q1) if strategy.startswith('BM25') else '$Q'}, {OBS_SIZE * easy_steps[strategy]})")
            oracle_steps += easy_steps[strategy]
            recalled_sp_ids.append(min(sp_ids, key=lambda k: sp_ranks[strategy][k]))
            if hard_steps[strategy] == easy_steps[strategy]:
                recalled_sp_ids.append(max(sp_ids, key=lambda k: sp_ranks[strategy][k]))
    
    for func_name in ['BM25', 'MDR']:
        programed_steps[func_name] = min(programed_steps[func_name], HOP_MAX_STEPS + 3)
        programed_steps[f'{func_name}+Link'] = min(programed_steps[f'{func_name}+Link'], HOP_MAX_STEPS + 3)
        if programed_steps[f'{func_name}+Link'] > programed_steps[func_name]:
            programed_steps[f'{func_name}+Link'] = programed_steps[func_name]
    programed_steps['BM25|MDR'] = programed_steps['BM25']
    programed_steps['BM25|MDR+Link'] = programed_steps['BM25+Link']
    if len(recalled_sp_ids) == 0 or set(recalled_sp_ids) == sp_ids:  # failed to recall SP1 or recalled the SP2 at the same time
        oracle_cmds.append(f"ANSWER({repr(answer) if set(recalled_sp_ids) == sp_ids else None})")
        all_oracle_cmds.append(oracle_cmds)
        all_oracle_steps.append(oracle_steps)
        all_programed_steps.append(programed_steps)
        continue

    # ==================== hop 2 ====================
    assert len(recalled_sp_ids) == 1
    sp1 = corpus[recalled_sp_ids[0]]
    sp1_title = sp1['title']
    sp2_id = (sp_ids - set(recalled_sp_ids)).pop()
    sp2 = corpus[sp2_id]
    norm_sp2_title = unescape(sp2['title'])
    sp2_ranks = {strategy: 2 * RET_SIZE for strategy in ["BM25", "BM25+Link", "MDR", "MDR+Link"]}
        
    # BM25
    obs = ' '.join([question, f"<t> {sp1['title']} </t> {sp1['text'][sp1['sentence_spans'][0][0]:sp1['sentence_spans'][-1][1]]}"])
    if not query_redis.exists(obs):
        query_redis.set(obs, qg2.predict(obs, question)[0][0].strip())
    q2 = query_redis.get(obs)
    if not bm25_redis.exists(q2) or (bm25_redis.llen(q2) < RET_SIZE and bm25_redis.lindex(q2, -1) != 'EOL'):
        hits = [hit['_id'] for hit in sparse_retriever.search(q2, RET_SIZE, filter_dic=hotpot_filter, n_retrieval=RET_SIZE * 2)]
        bm25_redis.delete(q2)
        if len(hits) < RET_SIZE:
            bm25_redis.rpush(q2, *(hits + ['EOL']))
        else:
            bm25_redis.rpush(q2, *hits)
    bm25_hits = bm25_redis.lrange(q2, 0, -1)
    if bm25_hits[-1] == 'EOL':
        bm25_hits = bm25_hits[:-1]
    for p_idx, p_id in enumerate(bm25_hits[:RET_SIZE]):
        para = corpus[p_id]
        hyperlinks = get_valid_links(para, strict=True)
        if p_id == sp2_id:
            sp2_ranks['BM25'] = min(p_idx, sp2_ranks['BM25'])
        elif norm_sp2_title in hyperlinks.keys():
            sp2_ranks['BM25+Link'] = min(p_idx, sp2_ranks['BM25+Link'])
        if max(sp2_ranks['BM25'], sp2_ranks['BM25+Link']) <= p_idx:
            break
    
    # MDR
    qk = f"{questions[q_idx]}\t+++\t{unescape(sp1['title'])}"
    if not mdr_redis.exists(qk) or (mdr_redis.llen(qk) < RET_SIZE and mdr_redis.lindex(qk, -1) != 'EOL'):
        expansion = sp1['text']
        expansion = expansion[sp1['sentence_spans'][0][0]:sp1['sentence_spans'][-1][1]]  # if strict
        expanded_query = (questions[q_idx], expansion if expansion else sp1['title'])
        hits = dense_retriever.search(expanded_query, max(RET_SIZE, 1000), args.max_q_sp_len)[0]
        mdr_redis.delete(qk)
        if len(hits) < RET_SIZE:
            mdr_redis.rpush(qk, *(hits + ['EOL']))
        else:
            mdr_redis.rpush(qk, *hits)
    mdr_hits = mdr_redis.lrange(qk, 0, -1)
    if mdr_hits[-1] == 'EOL':
        mdr_hits = mdr_hits[:-1]
    assert len(mdr_hits) > 0
    for p_idx, p_id in enumerate(mdr_hits[:RET_SIZE]):
        para = corpus[p_id]
        hyperlinks = get_valid_links(para, strict=True)
        if p_id == sp2_id:
            sp2_ranks['MDR'] = min(p_idx, sp2_ranks['BM25'])
        elif norm_sp2_title in hyperlinks.keys():
            sp2_ranks['MDR+Link'] = min(p_idx, sp2_ranks['BM25+Link'])
        if max(sp2_ranks['MDR'], sp2_ranks['MDR+Link']) <= p_idx:
            break
    
    sp2_steps = {}
    for strategy, sp2_rank in sp2_ranks.items():
        sp2_steps[strategy] = (sp2_rank + OBS_SIZE) // OBS_SIZE
        if strategy.endswith('+Link'):
            sp2_steps[strategy] += 1

    if norm_sp2_title in set(get_valid_links(sp1, strict=True)):
        oracle_cmds.append(f"[{sp1_title}]({sp2['title']})")
        oracle_steps += 1
        recalled_sp_ids.append(sp2_id)
        sp2_steps['BM25+Link'] = 1
        sp2_steps['MDR+Link'] = 1
    else:
        if min(sp2_ranks.values()) >= RET_SIZE:
            print(f"Unable recall SP2 through the first {RET_SIZE} retrieval results: {q_id}")
        else:
            strategy = min(sp2_steps.keys(), key=lambda k: sp2_steps[k])
            if strategy.endswith('+Link'):
                oracle_cmds.append(f"{strategy[:-5]}({repr(q2) if strategy.startswith('BM25') else ('$Q+${%s}' % sp1_title)}, {OBS_SIZE * (sp2_steps[strategy] - 1)})")
                oracle_steps += sp2_steps[strategy] - 1
                bridge_idx = sp2_ranks[strategy]
                bridge_id = bm25_hits[bridge_idx] if strategy.startswith('BM25') else mdr_hits[bridge_idx]
                bridge_para = corpus[bridge_id]
                tgts = set(get_valid_links(bridge_para, strict=True).keys())
                assert norm_sp2_title in tgts
                oracle_cmds[-1] += f"; [{bridge_para['title']}]({sp2['title']})"
                oracle_steps += 1
                recalled_sp_ids.append(sp2_id)
            else:
                oracle_cmds.append(f"{strategy}({repr(q1) if strategy.startswith('BM25') else ('$Q+${%s}' % sp1_title)}, {OBS_SIZE * easy_steps[strategy]})")
                oracle_steps += sp2_steps[strategy]
                recalled_sp_ids.append(sp2_id)
    
    oracle_cmds.append(f"ANSWER({repr(answer) if set(recalled_sp_ids) == sp_ids else None})")
    for func_name in ['BM25', 'MDR']:
        sp2_steps[func_name] = min(sp2_steps[func_name], HOP_MAX_STEPS + 2)
        sp2_steps[f'{func_name}+Link'] = min(sp2_steps[f'{func_name}+Link'], HOP_MAX_STEPS + 2)
        if sp2_steps[f'{func_name}+Link'] > sp2_steps[func_name]:
            sp2_steps[f'{func_name}+Link'] = sp2_steps[func_name]
    programed_steps['BM25|MDR'] += sp2_steps['MDR']
    programed_steps['BM25|MDR+Link'] += sp2_steps['MDR+Link']
    for strategy in sp2_steps.keys():
#         if sprogramed_steps[strategy] != HOP_MAX_STEPS + 3:
        programed_steps[strategy] += sp2_steps[strategy]
#     programed_steps = {strategy: programed_steps[strategy] + sp2_steps[strategy] for strategy in programed_steps.keys()}
    all_oracle_cmds.append(oracle_cmds)
    all_oracle_steps.append(oracle_steps)
    all_programed_steps.append(programed_steps)

HBox(children=(FloatProgress(value=0.0, max=7405.0), HTML(value='')))

Unable recall SP2 through the first 100 retrieval results: 5a7d54165542995f4f402256
Unable recall SP2 through the first 100 retrieval results: 5a77152355429966f1a36c2e
Unable recall SP2 through the first 100 retrieval results: 5ae7ba7a5542993210983f12
Unable recall SP1 through the first 100 retrieval results: 5ae738f75542991bbc9761c4
Unable recall SP2 through the first 100 retrieval results: 5a713a5a5542994082a3e6a9
Unable recall SP2 through the first 100 retrieval results: 5ab7f0015542992aa3b8c88b
Unable recall SP2 through the first 100 retrieval results: 5ac547525542993e66e822a3
Unable recall SP2 through the first 100 retrieval results: 5ab7f97a5542991d322237ef
Unable recall SP2 through the first 100 retrieval results: 5a77cb5e55429967ab1052a7
Unable recall SP2 through the first 100 retrieval results: 5a7f6eab5542992097ad2f66
Unable recall SP2 through the first 100 retrieval results: 5a8f69b55542997ba9cb324f
Unable recall SP1 through the first 100 retrieval results: 5ae1f596554299234

In [79]:
print(sum(all_oracle_steps) / len(all_oracle_steps))

3.737609723160027


In [80]:
avg_programed_steps = {}
for strategy in all_programed_steps[0].keys():
    avg_programed_steps[strategy] = sum(programed_steps[strategy] for programed_steps in all_programed_steps) / len(all_programed_steps)
print(avg_programed_steps)

{'BM25': 35.009858203916274, 'BM25+Link': 17.103578663065495, 'MDR': 19.42565833896016, 'MDR+Link': 13.569615124915597, 'BM25|MDR': 16.02795408507765, 'BM25|MDR+Link': 11.039837947332883}


In [81]:
print(all_oracle_steps[:10])
print(all_oracle_cmds[:10])

[2, 2, 3, 2, 2, 2, 2, 2, 2, 2]
[['MDR($Q, 1)', 'MDR($Q+${Ed Wood}, 1)', "ANSWER('yes')"], ["BM25('Corliss Archer in the film Kiss and Tell', 1)", '[Kiss and Tell (1945 film)](Shirley Temple)', "ANSWER('Chief of Protocol')"], ["BM25('set of companion books narrating the stories of enslaved worlds and alien species', 1); [The Andalite's Gift](Animorphs)", 'MDR($Q+${Animorphs}, 14)', "ANSWER('Animorphs')"], ['MDR($Q, 1)', "BM25('Laleli Mosque', 1)", "ANSWER('no')"], ['BM25(\'The director of the romantic comedy "Big Stone Gap"\', 1)', '[Big Stone Gap (film)](Adriana Trigiani)', "ANSWER('Greenwich Village, New York City')"], ["BM25('2014 S/S', 1)", '[2014 S/S](Winner (band))', "ANSWER('YG Entertainment')"], ["BM25('known by his stage name Aladin', 1)", '[Eenasul Fateh](Management consulting)', "ANSWER('Eenasul Fateh')"], ["BM25('The arena where the Lewiston Maineiacs', 1)", "BM25('The arena where the Lewiston Maineiacs', 1)", "ANSWER('3,677 seated')"], ["BM25('Annie Morton', 1)", '[Annie Mo

In [82]:
len([cmds for cmds in all_oracle_cmds if cmds[0].startswith('MDR(')])

3094

In [83]:
len([cmds for cmds in all_oracle_cmds if cmds[0].startswith('BM25(')])

4262

In [84]:
len([cmds for cmds in all_oracle_cmds if cmds[-1] == 'ANSWER(None)'])

207

<table style="border-collapse: collapse; border: none; border-spacing: 0px;">
	<tr>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); padding-right: 3pt; padding-left: 3pt;">
		</td>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); padding-right: 3pt; padding-left: 3pt;">
			1
		</td>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); padding-right: 3pt; padding-left: 3pt;">
			2
		</td>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); padding-right: 3pt; padding-left: 3pt;">
			5
		</td>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); padding-right: 3pt; padding-left: 3pt;">
			10
		</td>
	</tr>
	<tr>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: center; padding-right: 3pt; padding-left: 3pt;">
			BM25
		</td>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			35.66
		</td>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			16.34
		</td>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			7.14
		</td>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			4.32
		</td>
	</tr>
	<tr>
		<td style="text-align: center; padding-right: 3pt; padding-left: 3pt;">
			MDR
		</td>
		<td style="text-align: right; padding-right: 3pt; padding-left: 3pt;">
			22.23
		</td>
		<td style="text-align: right; padding-right: 3pt; padding-left: 3pt;">
			11.59
		</td>
		<td style="text-align: right; padding-right: 3pt; padding-left: 3pt;">
			5.54
		</td>
		<td style="text-align: right; padding-right: 3pt; padding-left: 3pt;">
			3.58
		</td>
	</tr>
	<tr>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); text-align: center; padding-right: 3pt; padding-left: 3pt;">
			BM25|MDR
		</td>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			17.95
		</td>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			9.40
		</td>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			4.62
		</td>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			3.08
		</td>
	</tr>
	<tr>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: center; padding-right: 3pt; padding-left: 3pt;">
			BM25+Link
		</td>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			16.69
		</td>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			7.38
		</td>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			3.71
		</td>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			2.62
		</td>
	</tr>
	<tr>
		<td style="text-align: center; padding-right: 3pt; padding-left: 3pt;">
			MDR+Link
		</td>
		<td style="text-align: right; padding-right: 3pt; padding-left: 3pt;">
			15.17
		</td>
		<td style="text-align: right; padding-right: 3pt; padding-left: 3pt;">
			8.26
		</td>
		<td style="text-align: right; padding-right: 3pt; padding-left: 3pt;">
			4.27
		</td>
		<td style="text-align: right; padding-right: 3pt; padding-left: 3pt;">
			2.96
		</td>
	</tr>
	<tr>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); text-align: center; padding-right: 3pt; padding-left: 3pt;">
			BM25|MDR+Link
		</td>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			12.00
		</td>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			6.64
		</td>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			3.58
		</td>
		<td style="border-bottom: 1px solid rgb(0, 0, 0); text-align: right; padding-right: 3pt; padding-left: 3pt;">
			2.58
		</td>
	</tr>
	<tr>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: center; border-bottom: 2px solid black; padding-right: 3pt; padding-left: 3pt;">
			oracle
		</td>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: right; border-bottom: 2px solid black; padding-right: 3pt; padding-left: 3pt;">
			3.79
		</td>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: right; border-bottom: 2px solid black; padding-right: 3pt; padding-left: 3pt;">
			2.61
		</td>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: right; border-bottom: 2px solid black; padding-right: 3pt; padding-left: 3pt;">
			1.97
		</td>
		<td style="border-top: 1px solid rgb(0, 0, 0); text-align: right; border-bottom: 2px solid black; padding-right: 3pt; padding-left: 3pt;">
			1.76
		</td>
	</tr>
</table>