# Cross-Encoder Fine-Tuning & Evaluation Notebook

This notebook outlines the end-to-end process for fine-tuning a Sentence-Transformers `CrossEncoder`.

## 1. Install & Import Dependencies

In [None]:
import os
import random
import pandas as pd
from sentence_transformers import CrossEncoder, InputExample
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import ndcg_score, roc_auc_score, average_precision_score

## 2. Configuration

In [None]:
# Paths
data_path = 'data/tmodels/crossencoder_book_reranker'raining_pairs.parquet'  # your tuning pairs
output_model_dir = '

# Training parameters
test_size = 0.1        # fraction of users for validation/test
epochs = 2
batch_size = 16
learning_rate = 2e-5
warmup_steps = 100
random_seed = 42

In [None]:
# Reproducibility
random.seed(random_seed)

## 3. Load & Split Data


# Load all pairs
df = pd.read_parquet(data_path)

# Split by user to avoid leak
users = df['user_id'].unique()
train_users, test_users = train_test_split(users, test_size=test_size, random_state=random_seed)

train_df = df[df['user_id'].isin(train_users)].reset_index(drop=True)
test_df  = df[df['user_id'].isin(test_users)].reset_index(drop=True)

print(f"Train pairs: {len(train_df)}, Test pairs: {len(test_df)}")


## 4. Prepare InputExamples & DataLoaders

In [None]:

# Convert to InputExample
train_examples = [
    InputExample(texts=[row.user_ctx, row.book_text], label=float(row.label))
    for row in train_df.itertuples()
]
val_examples = [
    InputExample(texts=[row.user_ctx, row.book_text], label=float(row.label))
    for row in test_df.itertuples()
]

# DataLoaders
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=batch_size)
val_dataloader   = DataLoader(val_examples, shuffle=False, batch_size=batch_size)


## 5. Instantiate & Fine-Tune CrossEncoder

In [None]:
model = CrossEncoder(
    'distilbert-base-uncased',
    num_labels=1,
    max_length=256,
)

model.fit(
    train_dataloader=train_dataloader,
    epochs=epochs,
    warmup_steps=warmup_steps,
    optimizer_params={'lr': learning_rate},
    evaluation_dataloader=val_dataloader,
    evaluation_steps=1000,
    output_path=output_model_dir
)

## 6. Evaluation on Test Set

In [None]:
# Prepare test pairs for prediction
test_pairs = [[row.user_ctx, row.book_text] for row in test_df.itertuples()]
scores = model.predict(test_pairs)
labels = test_df['label'].values

# Compute metrics
auc = roc_auc_score(labels, scores)
ap = average_precision_score(labels, scores)
ndcg = ndcg_score([labels], [scores], k=10)

print(f"ROC AUC: {auc:.4f}")
print(f"Average Precision: {ap:.4f}")
print(f"NDCG@10: {ndcg:.4f}")


## 7. Save & Load Model

In [None]:
# Model is already saved during fit
# To load:
from sentence_transformers import CrossEncoder
loaded_model = CrossEncoder(output_model_dir)


## 8. Inference Example

In [None]:
# Given a single user and its candidates
user_ctx = "Favorite books: ..."  # fetched or precomputed
candidate_texts = ["Title: ... Description: ...", ...]
pairs = [[user_ctx, txt] for txt in candidate_texts]
scores = loaded_model.predict(pairs)

# Rerank
candidates = ['book1', 'book2', ...]
ranked = sorted(zip(candidates, scores), key=lambda x: x[1], reverse=True)
print(ranked[:10])