# Pairwise Retrieval Experiments on The Vaswani Corpus

### Preparation + Tests + Imports

In [2]:
!nosetests ../tests

import sys
sys.path.insert(0, "../src")
import pyterrier as pt
from pyterrier_t5 import MonoT5ReRanker, DuoT5ReRanker
from retrieval_pipelines.FileSystemCache import FileSystemCache
from retrieval_pipelines.DuoT5ReRankerWithCache import DuoT5ReRankerWithCache
DIR = '/mnt/ceph/storage/data-in-progress/data-research/web-search/SIGIR-22/sigir22-pairwise-ranking/pairwise-cache/vaswani/'

if not pt.started():
    pt.init()
    
vaswani = pt.get_dataset("irds:vaswani")

....
----------------------------------------------------------------------
Ran 4 tests in 0.002s

OK


### DuoT5 Base


In [5]:
duo_t5_base = DuoT5ReRankerWithCache(
    tok_model='t5-base',
    model='castorini/duot5-base-msmarco',
    batch_size=32,
    cache=FileSystemCache(DIR + 'castorini-duot5-base-msmarco')
)


### Experiments with DuoT5 Base

In [6]:
bm25 = pt.BatchRetrieve(pt.get_dataset("vaswani").get_index(), wmodel="BM25")
duoT5 = bm25 >> pt.text.get_text(vaswani, "text") %50 >> duo_t5_base

In [7]:
pt.Experiment(
    [bm25, duoT5],
    vaswani.get_topics(),
    vaswani.get_qrels(),
    ['map','ndcg'],
    ['BM25', 'DuoT5'],
    baseline=0,
    correction='b'
)

duoT5:   0%|                                                                                                         | 0/93 [00:00<?, ?queries/s]


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

### DuoT5 B3


In [None]:
duo_t5_b3 = DuoT5ReRankerWithCache(
    tok_model='t5-3b',
    model='castorini/duot5-3b-msmarco',
    batch_size=32,
    cache=FileSystemCache(DIR + 'castorini-duot5-b3-msmarco')
)

bm25 = pt.BatchRetrieve(pt.get_dataset("vaswani").get_index(), wmodel="BM25")
duo_t5_b3 = bm25 >> pt.text.get_text(vaswani, "text") %50 >> duo_t5_b3

In [None]:
pt.Experiment(
    [bm25, duo_t5_b3],
    vaswani.get_topics(),
    vaswani.get_qrels(),
    ['map','ndcg'],
    ['BM25', 'DuoT5'],
    baseline=0,
    correction='b'
)