# Text Retrieval with Multi-Stage Re-Ranking Models

In [1]:
!python -V

Python 3.10.15


In [1]:
import torch
print(torch.version.cuda)  # Check the CUDA version
print(torch.cuda.is_available())  # Check if CUDA is available

11.8
True


# Model

# Dataset

## Preprocess

In [1]:
%%bash
cd /home/hoang/multi-stage-reranking
python preprocess_beir.py \
--data_path dataset/beir/original/12_7 \
--output_data_path dataset/beir/processed/12_7_v2 \
--model_name_or_path FacebookAI/roberta-large

  0%|          | 0/743 [00:00<?, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (594 > 512). Running this sequence through the model will result in indexing errors
100%|██████████| 743/743 [00:00<00:00, 765.68it/s]
100%|██████████| 3462/3462 [00:01<00:00, 2352.16it/s]
3459it [00:00, 248022.87it/s]
101it [00:00, 475982.81it/s]
292it [00:00, 613903.14it/s]


In [2]:
!du -h -d 1 /home/hoang/multi-stage-reranking/dataset/beir/processed

280M	/home/hoang/multi-stage-reranking/dataset/beir/processed/msmarco
21M	/home/hoang/multi-stage-reranking/dataset/beir/processed/12_7_v2
8.5M	/home/hoang/multi-stage-reranking/dataset/beir/processed/12_7
309M	/home/hoang/multi-stage-reranking/dataset/beir/processed


In [None]:
# !pip install pyserini==0.19.2 numpy==1.26.1 faiss-cpu==1.7.4

In [4]:
%%bash
cd /home/hoang/multi-stage-reranking
python preprocess_bm25.py \
--data_path dataset/beir/processed/12_7_v2 \
--output_data_path dataset/beir/processed_bm25/12_7_v2 \
--bm25_num_candidate 300

2024-11-07 06:21:10,408 INFO  [main] index.IndexCollection (IndexCollection.java:391) - Setting log level to INFO
2024-11-07 06:21:10,411 INFO  [main] index.IndexCollection (IndexCollection.java:394) - Starting indexer...
2024-11-07 06:21:10,412 INFO  [main] index.IndexCollection (IndexCollection.java:396) - DocumentCollection path: dataset/beir/processed_bm25/12_7_v2/document_processed
2024-11-07 06:21:10,412 INFO  [main] index.IndexCollection (IndexCollection.java:397) - CollectionClass: JsonCollection
2024-11-07 06:21:10,413 INFO  [main] index.IndexCollection (IndexCollection.java:398) - Generator: DefaultLuceneDocumentGenerator
2024-11-07 06:21:10,413 INFO  [main] index.IndexCollection (IndexCollection.java:399) - Threads: 1
2024-11-07 06:21:10,413 INFO  [main] index.IndexCollection (IndexCollection.java:400) - Language: en
2024-11-07 06:21:10,414 INFO  [main] index.IndexCollection (IndexCollection.java:401) - Stemmer: porter
2024-11-07 06:21:10,414 INFO  [main] index.IndexCollecti

100%|██████████| 3259/3259 [00:47<00:00, 68.65it/s]
100%|██████████| 73/73 [00:00<00:00, 85.64it/s]
100%|██████████| 130/130 [00:01<00:00, 70.12it/s]


In [5]:
!du -h -d 1 /home/hoang/multi-stage-reranking/dataset/beir/processed_bm25

214M	/home/hoang/multi-stage-reranking/dataset/beir/processed_bm25/msmarco
27M	/home/hoang/multi-stage-reranking/dataset/beir/processed_bm25/12_7_v2
27M	/home/hoang/multi-stage-reranking/dataset/beir/processed_bm25/12_7
268M	/home/hoang/multi-stage-reranking/dataset/beir/processed_bm25


# Training

## Normal (pointwise) LM

### seed=0

In [6]:
%cd /home/hoang/multi-stage-reranking
!CUDA_VISIBLE_DEVICES=0 python train.py \
--model_name_or_path ./model/pre_trained_models/MiniLM-L6-H384-distilled-from-RoBERTa-Large \
--tokenizer_name_or_path FacebookAI/roberta-large \
--do_train \
--task_type classification --negative_doc_cand_type all \
--id2doc_path dataset/beir/processed/12_7_v2/document.json \
--id2query_path dataset/beir/processed/12_7_v2/query.json \
--train_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/train.json \
--eval_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/dev.json \
--output_dir ./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s0_128_v2 \
--num_train_epochs 10 --learning_rate 5e-5 --seed 0 \
--per_device_train_batch_size 16 --per_device_eval_batch_size 16 \
--per_device_generate_batch_size 16 --total_batch_size 64 \
--source_block_size 128 --n_gpu 1 --device cuda --fp16

/home/hoang/multi-stage-reranking
11/07/2024 06:23:45 - INFO - __main__ -   Training/evaluation parameters Namespace(id2doc_path='dataset/beir/processed/12_7_v2/document.json', id2query_path='dataset/beir/processed/12_7_v2/query.json', train_query2doc_path='dataset/beir/processed_bm25/12_7_v2/qrels/train.json', eval_query2doc_path='dataset/beir/processed_bm25/12_7_v2/qrels/dev.json', test_query2doc_path=None, source_block_size=128, target_block_size=128, local_rank=-1, output_dir='./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s0_128_v2', do_train=True, do_eval=False, do_test=False, do_generate=False, per_device_train_batch_size=16, per_device_eval_batch_size=16, per_device_generate_batch_size=16, total_batch_size=64, gradient_accumulation_steps=1, learning_rate=5e-05, weight_decay=0.01, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=0.1, num_train_epochs=10, eval_freq=1, seed=0, data_seed=None, n_gpu=1, device='cuda', fp16=True,

In [7]:
%cd /home/hoang/multi-stage-reranking
!CUDA_VISIBLE_DEVICES=0 python train.py \
--model_name_or_path ./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s0_128_v2 \
--tokenizer_name_or_path ./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s0_128_v2 \
--do_test \
--task_type classification --negative_doc_cand_type all \
--id2doc_path dataset/beir/processed/12_7_v2/document.json \
--id2query_path dataset/beir/processed/12_7_v2/query.json \
--train_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/train.json \
--eval_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/dev.json \
--output_dir ./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s0_128_v2 \
--num_train_epochs 10 --learning_rate 5e-5 --seed 0 \
--per_device_train_batch_size 16 --per_device_eval_batch_size 16 \
--per_device_generate_batch_size 16 --total_batch_size 64 \
--source_block_size 128 --n_gpu 1 --device cuda --fp16

/home/hoang/multi-stage-reranking


11/07/2024 06:26:26 - INFO - __main__ -   Training/evaluation parameters Namespace(id2doc_path='dataset/beir/processed/12_7_v2/document.json', id2query_path='dataset/beir/processed/12_7_v2/query.json', train_query2doc_path='dataset/beir/processed_bm25/12_7_v2/qrels/train.json', eval_query2doc_path='dataset/beir/processed_bm25/12_7_v2/qrels/dev.json', test_query2doc_path=None, source_block_size=128, target_block_size=128, local_rank=-1, output_dir='./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s0_128_v2', do_train=False, do_eval=False, do_test=True, do_generate=False, per_device_train_batch_size=16, per_device_eval_batch_size=16, per_device_generate_batch_size=16, total_batch_size=64, gradient_accumulation_steps=1, learning_rate=5e-05, weight_decay=0.01, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=0.1, num_train_epochs=10, eval_freq=1, seed=0, data_seed=None, n_gpu=1, device='cuda', fp16=True, ignore_index=-100, data_size=1000

### seed=1

In [8]:
%cd /home/hoang/multi-stage-reranking
!CUDA_VISIBLE_DEVICES=0 python train.py \
--model_name_or_path ./model/pre_trained_models/MiniLM-L6-H384-distilled-from-RoBERTa-Large \
--tokenizer_name_or_path FacebookAI/roberta-large \
--do_train \
--task_type classification --negative_doc_cand_type all \
--id2doc_path dataset/beir/processed/12_7_v2/document.json \
--id2query_path dataset/beir/processed/12_7_v2/query.json \
--train_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/train.json \
--eval_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/dev.json \
--output_dir ./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s1_128_v2 \
--num_train_epochs 10 --learning_rate 5e-5 --seed 1 \
--per_device_train_batch_size 16 --per_device_eval_batch_size 16 \
--per_device_generate_batch_size 16 --total_batch_size 64 \
--source_block_size 128 --n_gpu 1 --device cuda --fp16

/home/hoang/multi-stage-reranking
11/07/2024 06:26:37 - INFO - __main__ -   Training/evaluation parameters Namespace(id2doc_path='dataset/beir/processed/12_7_v2/document.json', id2query_path='dataset/beir/processed/12_7_v2/query.json', train_query2doc_path='dataset/beir/processed_bm25/12_7_v2/qrels/train.json', eval_query2doc_path='dataset/beir/processed_bm25/12_7_v2/qrels/dev.json', test_query2doc_path=None, source_block_size=128, target_block_size=128, local_rank=-1, output_dir='./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s1_128_v2', do_train=True, do_eval=False, do_test=False, do_generate=False, per_device_train_batch_size=16, per_device_eval_batch_size=16, per_device_generate_batch_size=16, total_batch_size=64, gradient_accumulation_steps=1, learning_rate=5e-05, weight_decay=0.01, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=0.1, num_train_epochs=10, eval_freq=1, seed=1, data_seed=None, n_gpu=1, device='cuda', fp16=True,

In [9]:
%cd /home/hoang/multi-stage-reranking
!CUDA_VISIBLE_DEVICES=0 python train.py \
--model_name_or_path ./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s1_128_v2 \
--tokenizer_name_or_path ./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s1_128_v2 \
--do_test \
--task_type classification --negative_doc_cand_type all \
--id2doc_path dataset/beir/processed/12_7_v2/document.json \
--id2query_path dataset/beir/processed/12_7_v2/query.json \
--train_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/train.json \
--eval_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/dev.json \
--output_dir ./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s1_128_v2 \
--num_train_epochs 10 --learning_rate 5e-5 --seed 1 \
--per_device_train_batch_size 16 --per_device_eval_batch_size 16 \
--per_device_generate_batch_size 16 --total_batch_size 64 \
--source_block_size 128 --n_gpu 1 --device cuda --fp16

/home/hoang/multi-stage-reranking
11/07/2024 06:28:52 - INFO - __main__ -   Training/evaluation parameters Namespace(id2doc_path='dataset/beir/processed/12_7_v2/document.json', id2query_path='dataset/beir/processed/12_7_v2/query.json', train_query2doc_path='dataset/beir/processed_bm25/12_7_v2/qrels/train.json', eval_query2doc_path='dataset/beir/processed_bm25/12_7_v2/qrels/dev.json', test_query2doc_path=None, source_block_size=128, target_block_size=128, local_rank=-1, output_dir='./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s1_128_v2', do_train=False, do_eval=False, do_test=True, do_generate=False, per_device_train_batch_size=16, per_device_eval_batch_size=16, per_device_generate_batch_size=16, total_batch_size=64, gradient_accumulation_steps=1, learning_rate=5e-05, weight_decay=0.01, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=0.1, num_train_epochs=10, eval_freq=1, seed=1, data_seed=None, n_gpu=1, device='cuda', fp16=True,

### seed=2

In [10]:
%cd /home/hoang/multi-stage-reranking
!CUDA_VISIBLE_DEVICES=0 python train.py \
--model_name_or_path ./model/pre_trained_models/MiniLM-L6-H384-distilled-from-RoBERTa-Large \
--tokenizer_name_or_path FacebookAI/roberta-large \
--do_train \
--task_type classification --negative_doc_cand_type all \
--id2doc_path dataset/beir/processed/12_7_v2/document.json \
--id2query_path dataset/beir/processed/12_7_v2/query.json \
--train_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/train.json \
--eval_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/dev.json \
--output_dir ./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s2_128_v2 \
--num_train_epochs 10 --learning_rate 5e-5 --seed 2 \
--per_device_train_batch_size 16 --per_device_eval_batch_size 16 \
--per_device_generate_batch_size 16 --total_batch_size 64 \
--source_block_size 128 --n_gpu 1 --device cuda --fp16

/home/hoang/multi-stage-reranking


11/07/2024 06:29:05 - INFO - __main__ -   Training/evaluation parameters Namespace(id2doc_path='dataset/beir/processed/12_7_v2/document.json', id2query_path='dataset/beir/processed/12_7_v2/query.json', train_query2doc_path='dataset/beir/processed_bm25/12_7_v2/qrels/train.json', eval_query2doc_path='dataset/beir/processed_bm25/12_7_v2/qrels/dev.json', test_query2doc_path=None, source_block_size=128, target_block_size=128, local_rank=-1, output_dir='./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s2_128_v2', do_train=True, do_eval=False, do_test=False, do_generate=False, per_device_train_batch_size=16, per_device_eval_batch_size=16, per_device_generate_batch_size=16, total_batch_size=64, gradient_accumulation_steps=1, learning_rate=5e-05, weight_decay=0.01, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=0.1, num_train_epochs=10, eval_freq=1, seed=2, data_seed=None, n_gpu=1, device='cuda', fp16=True, ignore_index=-100, data_size=1000

In [11]:
%cd /home/hoang/multi-stage-reranking
!CUDA_VISIBLE_DEVICES=0 python train.py \
--model_name_or_path ./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s2_128_v2 \
--tokenizer_name_or_path ./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s2_128_v2 \
--do_test \
--task_type classification --negative_doc_cand_type all \
--id2doc_path dataset/beir/processed/12_7_v2/document.json \
--id2query_path dataset/beir/processed/12_7_v2/query.json \
--train_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/train.json \
--eval_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/dev.json \
--output_dir ./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s2_128_v2 \
--num_train_epochs 10 --learning_rate 5e-5 --seed 2 \
--per_device_train_batch_size 16 --per_device_eval_batch_size 16 \
--per_device_generate_batch_size 16 --total_batch_size 64 \
--source_block_size 128 --n_gpu 1 --device cuda --fp16

/home/hoang/multi-stage-reranking


11/07/2024 06:31:24 - INFO - __main__ -   Training/evaluation parameters Namespace(id2doc_path='dataset/beir/processed/12_7_v2/document.json', id2query_path='dataset/beir/processed/12_7_v2/query.json', train_query2doc_path='dataset/beir/processed_bm25/12_7_v2/qrels/train.json', eval_query2doc_path='dataset/beir/processed_bm25/12_7_v2/qrels/dev.json', test_query2doc_path=None, source_block_size=128, target_block_size=128, local_rank=-1, output_dir='./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s2_128_v2', do_train=False, do_eval=False, do_test=True, do_generate=False, per_device_train_batch_size=16, per_device_eval_batch_size=16, per_device_generate_batch_size=16, total_batch_size=64, gradient_accumulation_steps=1, learning_rate=5e-05, weight_decay=0.01, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=0.1, num_train_epochs=10, eval_freq=1, seed=2, data_seed=None, n_gpu=1, device='cuda', fp16=True, ignore_index=-100, data_size=1000

## Larger LM

## Pairwise LM

# Evaluation

## Only BM25

In [12]:
%cd /home/hoang/multi-stage-reranking
!CUDA_VISIBLE_DEVICES=0 python -u evaluate.py \
--id2doc_path dataset/beir/processed/12_7_v2/document.json \
--id2query_path dataset/beir/processed/12_7_v2/query.json \
--eval_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/test.json \
--use_bm25

/home/hoang/multi-stage-reranking
100%|███████████████████████████████████████| 130/130 [00:00<00:00, 2305.92it/s]
Search time:0.05939912796020508
MRR@10: 0.3857
MAP@10: 0.2631
Recall@1: 0.1619		My_recall@1: 0.2615
Recall@3: 0.2983		My_recall@3: 0.3103
Recall@5: 0.3614		My_recall@5: 0.3659
Recall@10: 0.4399		My_recall@10: 0.4404
Recall@20: 0.5448		My_recall@20: 0.5448
Recall@50: 0.6613		My_recall@50: 0.6613
Recall@100: 0.7145		My_recall@100: 0.7145
Recall@200: 0.8313		My_recall@200: 0.8313


## Only Normal LM

In [13]:
%cd /home/hoang/multi-stage-reranking
!CUDA_VISIBLE_DEVICES=0 python -u evaluate.py \
--id2doc_path dataset/beir/processed/12_7_v2/document.json \
--id2query_path dataset/beir/processed/12_7_v2/query.json \
--eval_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/test.json \
--batch_size 16 \
--bert_num_candidate 200 \
--source_block_size 128 \
--bert_task_type classification \
--use_bert \
--model_name_or_path \
./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s0_128_v2

/home/hoang/multi-stage-reranking
100%|█████████████████████████████████████████| 130/130 [00:21<00:00,  6.19it/s]
Search time:21.009658098220825
MRR@10: 0.0000
MAP@10: 0.0000
Recall@1: 0.0000		My_recall@1: 0.0000
Recall@3: 0.0000		My_recall@3: 0.0000
Recall@5: 0.0000		My_recall@5: 0.0000
Recall@10: 0.0000		My_recall@10: 0.0000
Recall@20: 0.0000		My_recall@20: 0.0000
Recall@50: 0.0082		My_recall@50: 0.0082
Recall@100: 0.0169		My_recall@100: 0.0169
Recall@200: 0.0423		My_recall@200: 0.0423


## BM25 + Normal LM

In [16]:
%cd /home/hoang/multi-stage-reranking
!CUDA_VISIBLE_DEVICES=0 python -u evaluate.py \
--id2doc_path dataset/beir/processed/12_7_v2/document.json \
--id2query_path dataset/beir/processed/12_7_v2/query.json \
--eval_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/test.json \
--batch_size 16 \
--bert_num_candidate 200 \
--source_block_size 128 \
--bert_task_type classification \
--use_bm25 --use_bert \
--model_name_or_path \
./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s0_128_v2

/home/hoang/multi-stage-reranking
100%|█████████████████████████████████████████| 130/130 [00:18<00:00,  7.22it/s]
Search time:18.01241397857666
MRR@10: 0.3582
MAP@10: 0.2361
Recall@1: 0.1542		My_recall@1: 0.2538
Recall@3: 0.2521		My_recall@3: 0.2641
Recall@5: 0.3076		My_recall@5: 0.3121
Recall@10: 0.4053		My_recall@10: 0.4058
Recall@20: 0.4935		My_recall@20: 0.4935
Recall@50: 0.6023		My_recall@50: 0.6023
Recall@100: 0.6632		My_recall@100: 0.6632
Recall@200: 0.8313		My_recall@200: 0.8313


## Normal LM + Ensemble

In [17]:
%cd /home/hoang/multi-stage-reranking
!CUDA_VISIBLE_DEVICES=0 python -u evaluate.py \
--id2doc_path dataset/beir/processed/12_7_v2/document.json \
--id2query_path dataset/beir/processed/12_7_v2/query.json \
--eval_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/test.json \
--batch_size 16 \
--bert_num_candidate 250 --second_bert_num_candidate 200 \
--source_block_size 128 --second_source_block_size 128 \
--bert_task_type classification --second_bert_task_type classification \
--use_bert --use_second_bert \
--model_name_or_path \
./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s0_128_v2 \
--second_model_name_or_path \
./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s0_128_v2 \
./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s1_128_v2 \
./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s2_128_v2

/home/hoang/multi-stage-reranking
100%|█████████████████████████████████████████| 130/130 [01:09<00:00,  1.88it/s]
Search time:69.32526755332947
MRR@10: 0.0009
MAP@10: 0.0009
Recall@1: 0.0000		My_recall@1: 0.0000
Recall@3: 0.0000		My_recall@3: 0.0000
Recall@5: 0.0000		My_recall@5: 0.0000
Recall@10: 0.0077		My_recall@10: 0.0077
Recall@20: 0.0077		My_recall@20: 0.0077
Recall@50: 0.0159		My_recall@50: 0.0159
Recall@100: 0.0362		My_recall@100: 0.0362
Recall@200: 0.0846		My_recall@200: 0.0846


## BM25 + Normal LM + Ensemble

In [18]:
%cd /home/hoang/multi-stage-reranking
!CUDA_VISIBLE_DEVICES=0 python -u evaluate.py \
--id2doc_path dataset/beir/processed/12_7_v2/document.json \
--id2query_path dataset/beir/processed/12_7_v2/query.json \
--eval_query2doc_path dataset/beir/processed_bm25/12_7_v2/qrels/test.json \
--batch_size 16 \
--bert_num_candidate 250 --second_bert_num_candidate 200 \
--source_block_size 128 --second_source_block_size 128 \
--bert_task_type classification --second_bert_task_type classification \
--use_bm25 --use_bert --use_second_bert \
--model_name_or_path \
./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s0_128_v2 \
--second_model_name_or_path \
./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s0_128_v2 \
./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s1_128_v2 \
./model/fine_tuned_models/MiniLM_L6_H384_msmarco_classification_all_e10_ns1_lr5e-5_s2_128_v2

/home/hoang/multi-stage-reranking


100%|█████████████████████████████████████████| 130/130 [01:11<00:00,  1.81it/s]
Search time:71.67794370651245
MRR@10: 0.3564
MAP@10: 0.2340
Recall@1: 0.1542		My_recall@1: 0.2538
Recall@3: 0.2521		My_recall@3: 0.2641
Recall@5: 0.3076		My_recall@5: 0.3121
Recall@10: 0.3796		My_recall@10: 0.3801
Recall@20: 0.4987		My_recall@20: 0.4987
Recall@50: 0.5869		My_recall@50: 0.5869
Recall@100: 0.6709		My_recall@100: 0.6709
Recall@200: 0.8159		My_recall@200: 0.8159


## Only Larger LM

## BM25 + Larger LM

## Normal LM + Larger LM

## BM25 + Normal LM + Larger LM

## BM25 + Normal LM + Pairwise LM