## Getting started (Adjust settings to your experiment's needs)

In [16]:
import os
import logging
import torch
from torch.optim import AdamW
from ir_measures import nDCG, AP, P, R, RR
from IRutils import models, train, inference
from IRutils.load_data import load, preprocess

#################### THINGS TO CHANGE FOR YOUR EXPERIMENTS ####################

dataset_name = "quora"  # SELECT YOUR EXPERIMENT DATASET HERE
model_name = "nreimers/MiniLM-L6-H384-uncased"  # SELECT YOUR MODEL HERE
"""
Some options:
DistilBERT: "distilbert-base-uncased" (66M params)
BERT: "bert-base-uncased" (110M params) or "bert-large-uncased" (340M params)
RoBERTa: "roberta-base" (125M params) or "roberta-large" (355M params)
ALBERT: "albert-base-v2" (12M params) or "albert-xxlarge-v2" (235M params)
ELECTRA: "google/electra-small-generator" (14M params) or "google/electra-base-generator" (110M params)
DeBERTa: "microsoft/deberta-base" (140M params) or "microsoft/deberta-v3-base" (184M params)
MPNet: "microsoft/mpnet-base" (110M params)
XLM-RoBERTa: "xlm-roberta-base" (125M params) or "xlm-roberta-large" (355M params)
T5: "t5-small" (60M params) or "t5-base" (220M params)
BART: "facebook/bart-base" (140M params) or "facebook/bart-large" (406M params)
LongFormer: "allenai/longformer-base-4096" (149M params)

For the distilled/smaller variants that are closer to DistilBERT in size and speed:

TinyBERT: "huawei-noah/TinyBERT_General_4L_312D" (15M params)
MobileBERT: "google/mobilebert-uncased" (25M params)
DistilRoBERTa: "distilroberta-base" (82M params)
"""
# Create dataset for a specific query length range (e.g., short queries)
"""
Options: 
short - 0-33 percentile (length)
medium - 33-67 percentile (length)
long - 67-100 percentile (length)
full - all data (sampled to one third)
"""
length_setting = 'long'

metrics = [
    nDCG @ 3, nDCG @ 5, nDCG @ 10, # Added nDCG@3
    RR,
    P @ 1, P @ 3, P @ 5,
    R @ 1, R @ 3, R @ 5, R @ 10    # Added R@1, R@3
]

#################### THINGS TO CHANGE FOR YOUR EXPERIMENTS ####################

logging.disable(logging.WARNING)

max_len_doc = 512  # max token length
random_state = 42

In [17]:
train_available, docs, queries, qrels, docs_test, queries_test, qrels_test  = load(dataset_name)
print('Loading complete!')

  0%|          | 0/522931 [00:00<?, ?it/s]

Only test set available!
Loading complete!


In [18]:
if train_available:
    train_loader, val_loader, test_loader, split_queries_test, split_qrels_test = preprocess(queries, docs, qrels, model_name, length_setting, train_available, 
                                                       queries_test=queries_test, docs_test=docs_test, qrels_test=qrels_test, 
                                                       max_len_doc=max_len_doc, random_state=random_state)
else:
    train_loader, val_loader, test_loader, split_queries_test, split_qrels_test = preprocess(queries, docs, qrels, model_name, length_setting, train_available, 
                                                       max_len_doc=max_len_doc, random_state=random_state)
    
print('Preprocessing complete!')

8 10
Dataset size: 10000
test size: 2000
10 9223372036854775807
Example query from long subset:
('537876', 'How do Russian politics and geostrategy affect Australia and New Zealand?')
Length of subset of long validation queries: 483
Length of subset of long training queries: 1929
Length of subset of long queries: 2412
Number of negatives in qrels: 0
Creating training dataset...


 25%|██▌       | 488/1929 [00:17<00:51, 27.89it/s]


KeyboardInterrupt: 

### Initialize model

In [4]:
# Initialize model and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.TripletRankerModel(model_name).to(device)
optimizer = AdamW(model.parameters(), lr=2e-5)

# Define model save dir
os.makedirs(f'models/{model_name}/{dataset_name}', exist_ok=True)
model_path = os.path.join(os.getcwd(), f'models/{model_name}/{dataset_name}/{length_setting}_queries.pth')

### Train model (load directly if already trained)

In [None]:
print(model_path)
if os.path.isfile(model_path):
    model.load_state_dict(torch.load(model_path, map_location=device))
else:
    # Train the model
    model = train.train_triplet_ranker(model, train_loader, val_loader, optimizer, device, model_path)

C:\Users\chena\PycharmProjects\IR-rankingmodels\models/huawei-noah/TinyBERT_General_4L_312D/fiqa/full_queries.pth


Epoch 1/10 (Training):   0%|          | 0/27880 [00:00<?, ?it/s]

Epoch 1/10, Average Training Loss: 0.4634


Validation: 100%|██████████| 7533/7533 [05:55<00:00, 21.21it/s]


Validation Loss: 0.3679
Validation loss improved. Saving model.


Epoch 2/10 (Training):   0%|          | 0/27880 [00:00<?, ?it/s]

## Run inference on test set (Optional)

In [None]:
# Example usage (replace with your data and model)
if train_available:
    metric_scores = inference.evaluate(model, test_loader, device, qrels_test)
else:
    metric_scores = inference.evaluate(model, test_loader, device, split_qrels_test)

for metric in metrics:
    print(f'Metric {metric} score: {metric_scores[metric]:.4f}')

## Write results to output

In [None]:
save_dir = f"results/{model_name}/{dataset_name}"
os.makedirs(save_dir, exist_ok=True)
save_path = os.path.join(save_dir, f'{length_setting}_queries.txt')

inference.write_results(metric_scores, save_path, model_name, dataset_name, length_setting)