In [None]:
from json import dump
from os import environ
from os.path import isfile
from pathlib import Path

from numpy import array, array_split
from pandas import DataFrame, concat
from pyterrier import started, init

environ['JAVA_HOME'] = '/usr/lib/jvm/java-11-openjdk-amd64/'
if not started():
    init()

from pyterrier.io import read_results, write_results
from pyterrier.pipelines import Experiment, KFoldGridSearch
from pyterrier.transformer import TransformerBase

In [22]:
from ir_datasets import load


class RunFileTransformer(TransformerBase):
    def __init__(self, *args, **kwargs):
        super(RunFileTransformer, self).__init__(*args, **kwargs)
        self.run_file = None

    def transform(self, r):
        qids = set(r['qid'].astype(str).unique())
        ret = self.load_approach()
        ret = ret[ret['qid'].astype(str).isin(qids)]
        
        return ret

    def load_approach(self):
        assert self.run_file is not None
        
        return read_results(self.run_file)

def to_config(t):
    ret = {}
    for i in t:
        ret[i[1]] = i[2]
    return ret

def qrels():
    df = []
    for topic, labels in load('msmarco-passage/trec-dl-2020/judged').qrels.asdict().items():
        for doc_id, rel in labels.items():
            df += [{'qid': topic, 'docno': doc_id, 'label': int(rel), 'iteration': 0}]
    
    return DataFrame(df)

def folds():
    qids = array(sorted(list(set(qrels()['qid'].unique()))))
    ret = []
    for split in array_split(qids, 5):
        ret_split = []
        
        for qid in split:
            ret_split += [{'qid': qid, 'query': 'unused dummy query ' +  qid}]
            
        ret += [DataFrame(ret_split)]
    
    return ret

def perform_grid_search(approach, run_files, measure):
    out_dir = '/mnt/ceph/storage/data-in-progress/data-research/web-search/ir-axioms/reranking-runs-cross-validation/run-' + approach + '-grid-search-for-' + measure + '/'
    Path(out_dir).mkdir(parents=True, exist_ok=True)
    if isfile(out_dir + 'run.txt'):
        return

    import contextlib
    import io

    with contextlib.redirect_stderr(io.StringIO()), contextlib.redirect_stdout(io.StringIO()):
        retrieval_approach = RunFileTransformer()
    
        a, b = KFoldGridSearch(
            retrieval_approach,
            {retrieval_approach: {'run_file': run_files}},
            folds(),
            qrels(),
            measure,
        )
    
        b = [to_config(i) for i in b]
        write_results(a, out_dir + 'run.txt')
        dump(b, open(out_dir + 'params.json', 'w'))


ALL_RUNS = {'BM25': [], 'KwikSort-MV': [], 'KwikSort-RF': [], 'LambdaMART': []}
for i in range(1,11):
    for approach in ALL_RUNS.keys():
        ALL_RUNS[approach] += ['/mnt/ceph/storage/data-in-progress/data-research/web-search/ir-axioms/reranking-runs/run-' + str(i) + '-' + approach + '.res.gz']

In [34]:
for approach, run_files in ALL_RUNS.items():
    print(approach)
    perform_grid_search(approach, run_files, 'ndcg_cut.10')
    perform_grid_search(approach, run_files, 'recip_rank')
    perform_grid_search(approach, run_files, 'ndcg_cut.5')
    perform_grid_search(approach, run_files, 'map')

BM25
KwikSort-MV
KwikSort-RF
LambdaMART


In [25]:
!ls /mnt/ceph/storage/data-in-progress/data-research/web-search/ir-axioms/reranking-runs-cross-validation/

run-BM25-grid-search-for-ndcg_cut.10
run-KwikSort-MV-grid-search-for-ndcg_cut.10
run-KwikSort-RF-grid-search-for-ndcg_cut.10
run-LambdaMART-grid-search-for-ndcg_cut.10


In [30]:
measure = 'ndcg_cut.10'
runs = [(i, read_results('/mnt/ceph/storage/data-in-progress/data-research/web-search/ir-axioms/reranking-runs-cross-validation/run-' + i + '-grid-search-for-' + measure + '/run.txt')) for i in ALL_RUNS.keys()]

df = Experiment(
        [i for _, i in runs],
        concat(folds()),
        qrels(),
        [measure],
        [i for i, _ in runs],
        test='t',
        baseline=0,
        correction='b',
    )

df

Unnamed: 0,name,ndcg_cut.10,ndcg_cut.10 +,ndcg_cut.10 -,ndcg_cut.10 p-value,ndcg_cut.10 reject,ndcg_cut.10 p-value corrected
0,BM25,0.493627,,,,False,
1,KwikSort-MV,0.491858,2.0,3.0,0.190422,False,0.76169
2,KwikSort-RF,0.492215,13.0,16.0,0.897231,False,1.0
3,LambdaMART,0.498449,30.0,22.0,0.775507,False,1.0


In [31]:
measure = 'recip_rank'
runs = [(i, read_results('/mnt/ceph/storage/data-in-progress/data-research/web-search/ir-axioms/reranking-runs-cross-validation/run-' + i + '-grid-search-for-' + measure + '/run.txt')) for i in ALL_RUNS.keys()]

df = Experiment(
        [i for _, i in runs],
        concat(folds()),
        qrels(),
        [measure],
        [i for i, _ in runs],
        test='t',
        baseline=0,
        correction='b',
    )

df

Unnamed: 0,name,recip_rank,recip_rank +,recip_rank -,recip_rank p-value,recip_rank reject,recip_rank p-value corrected
0,BM25,0.802359,,,,False,
1,KwikSort-MV,0.802102,0.0,1.0,0.321854,False,1.0
2,KwikSort-RF,0.835017,4.0,2.0,0.119284,False,0.477137
3,LambdaMART,0.831643,11.0,8.0,0.490028,False,1.0


In [33]:
measure = 'ndcg_cut.5'
runs = [(i, read_results('/mnt/ceph/storage/data-in-progress/data-research/web-search/ir-axioms/reranking-runs-cross-validation/run-' + i + '-grid-search-for-' + measure + '/run.txt')) for i in ALL_RUNS.keys()]

df = Experiment(
        [i for _, i in runs],
        concat(folds()),
        qrels(),
        [measure],
        [i for i, _ in runs],
        test='t',
        baseline=0,
        correction='b',
    )

df

Unnamed: 0,name,ndcg_cut.5,ndcg_cut.5 +,ndcg_cut.5 -,ndcg_cut.5 p-value,ndcg_cut.5 reject,ndcg_cut.5 p-value corrected
0,BM25,0.496557,,,,False,
1,KwikSort-MV,0.495741,1.0,1.0,0.376049,False,1.0
2,KwikSort-RF,0.515843,9.0,3.0,0.083015,False,0.332059
3,LambdaMART,0.517012,30.0,19.0,0.42076,False,1.0


In [35]:
measure = 'map'
runs = [(i, read_results('/mnt/ceph/storage/data-in-progress/data-research/web-search/ir-axioms/reranking-runs-cross-validation/run-' + i + '-grid-search-for-' + measure + '/run.txt')) for i in ALL_RUNS.keys()]

df = Experiment(
        [i for _, i in runs],
        concat(folds()),
        qrels(),
        [measure],
        [i for i, _ in runs],
        test='t',
        baseline=0,
        correction='b',
    )

df

Unnamed: 0,name,map,map +,map -,map p-value,map reject,map p-value corrected
0,BM25,0.358724,,,,False,
1,KwikSort-MV,0.358587,2.0,8.0,0.035798,False,0.143192
2,KwikSort-RF,0.364507,15.0,18.0,0.338613,False,1.0
3,LambdaMART,0.363394,28.0,23.0,0.443931,False,1.0
