In [1]:
import json
import os
import random
import torch
from datasets import Dataset
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, losses, InputExample, evaluation

class EvidenceClaimRetriever:
    def __init__(self, model_name='all-mpnet-base-v2', batch_size=16, num_epochs=3):
        self.model = SentenceTransformer(model_name)
        self.batch_size = batch_size
        self.num_epochs = num_epochs
        self.evidence_map = {}
        self.label_mapping = {
            'SUPPORTS': 1,
            'REFUTES': 1,
            'DISPUTED': 1,
            'NOT_ENOUGH_INFO': 0
        }

    def load_data(self, claims_path, evidence_path):
        """Load training data and evidence corpus"""
        # Load evidence corpus
        with open(evidence_path) as f:
            self.evidence_map = json.load(f)

        # Load training claims
        with open(claims_path) as f:
            train_claims = json.load(f)

        # Create training examples
        self.train_examples = []
        for claim_id, claim_data in train_claims.items():
            self._process_claim(claim_data)

    def _process_claim(self, claim_data):
        """Create training examples for a single claim"""
        claim_text = claim_data['claim_text']
        evidence_ids = claim_data['evidences']

        # Add positive examples
        for eid in evidence_ids:
            if eid in self.evidence_map:
                self.train_examples.append(InputExample(
                    texts=[claim_text, self.evidence_map[eid]],
                    label=self.label_mapping[claim_data['claim_label']]
                ))

        # Add negative examples
        self._add_negative_examples(claim_text, evidence_ids, claim_data['claim_label'])

    def _add_negative_examples(self, claim_text, positive_ids, label):
        """Add negative examples with proper ratio"""
        all_evidence_ids = list(self.evidence_map.keys())
        negative_candidates = list(set(all_evidence_ids) - set(positive_ids))

        # Determine number of negatives based on label
        if label == 'NOT_ENOUGH_INFO':
            num_negatives = min(2, len(negative_candidates))
        else:
            num_negatives = min(3 * len(positive_ids), len(negative_candidates))

        # Add selected negatives
        for eid in random.sample(negative_candidates, num_negatives):
            self.train_examples.append(InputExample(
                texts=[claim_text, self.evidence_map[eid]],
                label=0
            ))

    def load_dev_data(self, dev_claims_path):
        """Load separate dev set for validation"""
        with open(dev_claims_path) as f:
            dev_claims = json.load(f)

        # Prepare for InformationRetrievalEvaluator
        self.dev_queries = {}
        self.relevant_docs = {}

        for claim_id, claim_data in dev_claims.items():
            claim_text = claim_data['claim_text']
            evidence_ids = claim_data['evidences']

            self.dev_queries[claim_id] = claim_text
            self.relevant_docs[claim_id] = set(evidence_ids)

    def train(self, output_dir='./trained_model'):
        """Train using dedicated dev set for validation"""
        # Create evaluator using separate dev set
        dev_evaluator = evaluation.InformationRetrievalEvaluator(
            queries=self.dev_queries,
            corpus=self.evidence_map,
            relevant_docs=self.relevant_docs,
            show_progress_bar=True
        )

        # Create DataLoader directly from InputExamples
        train_dataloader = DataLoader(
            self.train_examples,  # Use the list of InputExamples directly
            shuffle=True,
            batch_size=self.batch_size
        )

        # Use MultipleNegativesRankingLoss
        train_loss = losses.MultipleNegativesRankingLoss(self.model)

        # Training configuration
        self.model.fit(
            train_objectives=[(train_dataloader, train_loss)],
            evaluator=dev_evaluator,
            epochs=self.num_epochs,
            evaluation_steps=100,
            warmup_steps=int(len(self.train_examples) * 0.1),
            output_path=output_dir,
            show_progress_bar=True
        )
        
    def retrieve(self, claim_text, top_k=5):
        """Retrieve evidence for a claim"""
        claim_embed = self.model.encode(claim_text)
        scores = torch.nn.functional.cosine_similarity(
            torch.tensor(claim_embed),
            torch.tensor(self.evidence_embeddings)
        )
        top_indices = torch.topk(scores, k=top_k).indices.tolist()
        return [(self.evidence_ids[i], scores[i].item()) for i in top_indices]

    def index_evidence(self, save_path='data/evidence_embeddings.pth'):
        """Precompute all evidence embeddings and optionally save them"""
        self.evidence_ids = list(self.evidence_map.keys())
        self.evidence_texts = [self.evidence_map[eid] for eid in self.evidence_ids]

        # Check if embeddings already exist
        if save_path and os.path.exists(save_path):
            print("Loading precomputed evidence embeddings...")
            data = torch.load(save_path, weights_only=False)
            self.evidence_embeddings = data['embeddings']
            self.evidence_ids = data['ids']
        else:
            print("Computing evidence embeddings...")
            self.evidence_embeddings = self.model.encode(
                self.evidence_texts,
                show_progress_bar=True,
                batch_size=32
            )
            # Save embeddings if save_path is provided
            if save_path:
                print("Saving evidence embeddings...")
                torch.save({'embeddings': self.evidence_embeddings, 'ids': self.evidence_ids}, save_path)


    

In [2]:
evidenceRetriever = EvidenceClaimRetriever(
        model_name='all-MiniLM-L6-v2',
        batch_size=32,
        num_epochs=5
    )


In [3]:

# 1. Load training data and evidence
evidenceRetriever.load_data(
    claims_path='data/train-claims.json',
    evidence_path='data/evidence.json'
)


In [4]:

# 2. Load separate dev set
evidenceRetriever.load_dev_data('data/dev-claims-baseline.json')

In [5]:

# 3. Precompute evidence embeddings
evidenceRetriever.index_evidence()

Loading precomputed evidence embeddings...


In [9]:


# 4. Train with dev set validation
# trainer.train(output_dir='./climate_retriever')




Step,Training Loss,Validation Loss,Cosine Accuracy@1,Cosine Accuracy@3,Cosine Accuracy@5,Cosine Accuracy@10,Cosine Precision@1,Cosine Precision@3,Cosine Precision@5,Cosine Precision@10,Cosine Recall@1,Cosine Recall@3,Cosine Recall@5,Cosine Recall@10,Cosine Ndcg@10,Cosine Mrr@10,Cosine Map@100
100,No log,No log,0.24026,0.396104,0.448052,0.538961,0.24026,0.164502,0.123377,0.083766,0.100758,0.188961,0.224567,0.287987,0.248052,0.331027,0.204947
200,No log,No log,0.201299,0.324675,0.38961,0.5,0.201299,0.138528,0.107792,0.078571,0.082576,0.159848,0.20671,0.280411,0.224574,0.281705,0.173821
300,No log,No log,0.194805,0.337662,0.396104,0.467532,0.194805,0.145022,0.112987,0.074675,0.081277,0.169913,0.209632,0.269372,0.22048,0.277394,0.173043
359,No log,No log,0.220779,0.350649,0.422078,0.506494,0.220779,0.147186,0.122078,0.07987,0.091017,0.173485,0.226299,0.287013,0.237954,0.305148,0.186771
400,No log,No log,0.227273,0.363636,0.428571,0.512987,0.227273,0.151515,0.123377,0.081818,0.10184,0.17803,0.227597,0.294264,0.245545,0.310774,0.194059
500,3.750600,No log,0.220779,0.38961,0.448052,0.564935,0.220779,0.166667,0.131169,0.08961,0.095671,0.190693,0.237771,0.323918,0.25984,0.325193,0.201078
600,3.750600,No log,0.227273,0.402597,0.461039,0.597403,0.227273,0.168831,0.135065,0.095455,0.094589,0.193182,0.249134,0.333009,0.266805,0.335225,0.207562
700,3.750600,No log,0.24026,0.409091,0.493506,0.616883,0.24026,0.170996,0.144156,0.094805,0.102381,0.193939,0.260065,0.338853,0.273624,0.349052,0.216047
718,3.750600,No log,0.24026,0.383117,0.493506,0.61039,0.24026,0.158009,0.146753,0.096104,0.098052,0.183333,0.268182,0.340909,0.271977,0.345552,0.212218
800,3.750600,No log,0.246753,0.396104,0.525974,0.616883,0.246753,0.160173,0.153247,0.097403,0.099351,0.185173,0.285931,0.349242,0.27815,0.355677,0.216744


Batches: 100%|██████████| 5/5 [00:00<00:00, 25.45it/s]
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 76.54it/s] 
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 75.89it/s] 
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 76.12it/s] 
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 76.65it/s] 
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 75.89it/s] 
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 76.23it/s] 
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 76.63it/s] 
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 76.15it/s] 
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 76.49it/s] 
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 76.78it/s] 
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 77.75it/s] ]
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 76.40it/s] ]
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 76.98it/s] ]
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 76.99it/s] ]
Batches: 100%|██████████| 1563/1563 [00:20<00:00, 77.00it/s] ]
Batches: 1

In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re

class LLaMAFilter:
    def __init__(self, model_name="meta-llama/Llama-3.1-8B-Instruct"):
        self.device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name,
                                                          torch_dtype=torch.float16,
                                                          ).to(self.device)
        self.tokenizer.pad_token = self.tokenizer.eos_token
        
    def filter_evidence(self, claim, evidence_candidates, final_k=5):
        """Filter evidence using LLaMA's relevance scoring"""
        prompts = [
            f"Claim: {claim}\nEvidence: {text}\nRate relevance 1-5 (1 is not relevant, 5 is perfect match): "
            for eid, text in evidence_candidates
        ]
            
        inputs = self.tokenizer(prompts, return_tensors="pt", padding=True).to(self.device)
        outputs = self.model.generate(
            **inputs,
            max_new_tokens=2,
            temperature=0.1,
            pad_token_id=self.tokenizer.eos_token_id
        )
        scored_evidence = []
        for (eid, text), output in zip(evidence_candidates, outputs):
            output_text = self.tokenizer.decode(output, skip_special_tokens=True)
            # Extract the first digit from the output
            digits = re.findall(r'\d', output_text)
            if digits:
                rating = int(digits[0])
                # Ensure rating is between 1-5
                rating = max(1, min(5, rating))
            else:
                # Default to middle rating if no digit found
                rating = 3
            scored_evidence.append((eid, text, rating))
        scored_evidence.sort(key=lambda x: x[2], reverse=True)
        return scored_evidence[:final_k]

In [14]:
def get_evidence_text(evidences):
    return [(eid, evidenceRetriever.evidence_map[eid]) for eid in evidences]

In [8]:
def initial_retrieval(retriever, test_claims_path):
    with open(test_claims_path) as f:
        test_claims = json.load(f)
    
    output = {}
    for claim_id, claim_data in test_claims.items():
        # Retrieve candidates
        candidates = retriever.retrieve(claim_data['claim_text'], top_k=10)
        retrieved = [eid for eid, _  in candidates[:10]]
        output[claim_id] = {
            "initial_evidences": retrieved,
            "claim_text": claim_data['claim_text']
        }

    # store output
    with open('data/initial_retrieval_output.json', 'w') as f:
        json.dump(output, f, indent=2)
    return output


In [9]:
res = initial_retrieval(evidenceRetriever, test_claims_path='data/dev-claims-baseline.json')

In [10]:
llama_filter = LLaMAFilter()

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [15]:
def process_claims(llama_filterer, dev_claims_set):
    for claim_id, claim_data in dev_claims_set.items():
        candidates = claim_data['initial_evidences']
        print(candidates)
        filtered = llama_filterer.filter_evidence(claim_data['claim_text'], get_evidence_text(candidates))
        dev_claims_set[claim_id]['filtered_evidences'] = [eid for eid, _, _ in filtered]
    
        # store output
    with open('data/filtered_retrieval_output.json', 'w') as f:
        json.dump(dev_claims_set, f, indent=2)
    return dev_claims_set

res = process_claims(llama_filter,res)
res

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-67732', 'evidence-572512', 'evidence-780332', 'evidence-1061888', 'evidence-452156', 'evidence-685728', 'evidence-48256', 'evidence-1002110', 'evidence-1050894', 'evidence-1075134']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-647121', 'evidence-559290', 'evidence-415619', 'evidence-361694', 'evidence-949910', 'evidence-970402', 'evidence-121187', 'evidence-866495', 'evidence-1075865', 'evidence-419267']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-694262', 'evidence-403673', 'evidence-332770', 'evidence-527805', 'evidence-1084381', 'evidence-842114', 'evidence-112875', 'evidence-1086865', 'evidence-343576', 'evidence-937622']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-472751', 'evidence-745642', 'evidence-856375', 'evidence-641043', 'evidence-101223', 'evidence-771374', 'evidence-15816', 'evidence-818689', 'evidence-553214', 'evidence-888192']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-264761', 'evidence-52981', 'evidence-44751', 'evidence-640229', 'evidence-691825', 'evidence-858437', 'evidence-1200633', 'evidence-1041564', 'evidence-725045', 'evidence-710110']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-94272', 'evidence-866495', 'evidence-1075865', 'evidence-210167', 'evidence-632574', 'evidence-677627', 'evidence-453635', 'evidence-381399', 'evidence-730469', 'evidence-1154637']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-908113', 'evidence-386068', 'evidence-1129386', 'evidence-773994', 'evidence-462912', 'evidence-358086', 'evidence-220190', 'evidence-901197', 'evidence-276290', 'evidence-166324']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-734024', 'evidence-534425', 'evidence-956812', 'evidence-1071200', 'evidence-792207', 'evidence-356178', 'evidence-1119853', 'evidence-322438', 'evidence-430670', 'evidence-164201']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-81628', 'evidence-938028', 'evidence-181656', 'evidence-470476', 'evidence-789848', 'evidence-660972', 'evidence-6787', 'evidence-516721', 'evidence-1001780', 'evidence-1192101']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-270324', 'evidence-91026', 'evidence-1131134', 'evidence-321341', 'evidence-416567', 'evidence-52744', 'evidence-313483', 'evidence-169186', 'evidence-446603', 'evidence-54880']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-599055', 'evidence-604296', 'evidence-256785', 'evidence-485896', 'evidence-1116760', 'evidence-1124385', 'evidence-27377', 'evidence-16169', 'evidence-78265', 'evidence-977792']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-208053', 'evidence-425667', 'evidence-15399', 'evidence-273631', 'evidence-157943', 'evidence-243372', 'evidence-186411', 'evidence-1177233', 'evidence-204526', 'evidence-1198348']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-894521', 'evidence-797505', 'evidence-975870', 'evidence-799984', 'evidence-665226', 'evidence-1088478', 'evidence-29528', 'evidence-118577', 'evidence-171206', 'evidence-59240']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1186875', 'evidence-421870', 'evidence-1073433', 'evidence-817269', 'evidence-539221', 'evidence-1065728', 'evidence-707408', 'evidence-123812', 'evidence-646466', 'evidence-731284']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-526818', 'evidence-158204', 'evidence-223619', 'evidence-779966', 'evidence-78884', 'evidence-848463', 'evidence-1127057', 'evidence-1042857', 'evidence-1008952', 'evidence-700426']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1007525', 'evidence-558580', 'evidence-651900', 'evidence-985452', 'evidence-442589', 'evidence-368192', 'evidence-687835', 'evidence-855992', 'evidence-399231', 'evidence-348895']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1005403', 'evidence-954808', 'evidence-569938', 'evidence-384007', 'evidence-223772', 'evidence-1192872', 'evidence-797505', 'evidence-785015', 'evidence-126945', 'evidence-548766']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-190063', 'evidence-625563', 'evidence-1040875', 'evidence-608418', 'evidence-326733', 'evidence-530201', 'evidence-220020', 'evidence-1160125', 'evidence-106073', 'evidence-121664']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-586254', 'evidence-1107462', 'evidence-111881', 'evidence-731434', 'evidence-39028', 'evidence-114604', 'evidence-957020', 'evidence-954212', 'evidence-286845', 'evidence-256960']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-886611', 'evidence-720171', 'evidence-488270', 'evidence-510347', 'evidence-1079971', 'evidence-259826', 'evidence-36994', 'evidence-517090', 'evidence-64777', 'evidence-222883']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-411817', 'evidence-1020001', 'evidence-783884', 'evidence-374084', 'evidence-964861', 'evidence-123917', 'evidence-586254', 'evidence-392335', 'evidence-756019', 'evidence-306587']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-894734', 'evidence-612560', 'evidence-882011', 'evidence-196041', 'evidence-607697', 'evidence-84979', 'evidence-650615', 'evidence-741149', 'evidence-1112861', 'evidence-320996']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-754329', 'evidence-1174820', 'evidence-75640', 'evidence-918262', 'evidence-69294', 'evidence-487201', 'evidence-178152', 'evidence-19584', 'evidence-835367', 'evidence-738452']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-262028', 'evidence-801082', 'evidence-121664', 'evidence-714864', 'evidence-178206', 'evidence-843608', 'evidence-368192', 'evidence-814004', 'evidence-725977', 'evidence-100461']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1077486', 'evidence-148977', 'evidence-264263', 'evidence-903248', 'evidence-323097', 'evidence-738480', 'evidence-1032278', 'evidence-976802', 'evidence-159348', 'evidence-890015']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-189592', 'evidence-867005', 'evidence-308708', 'evidence-965363', 'evidence-752013', 'evidence-1099922', 'evidence-95782', 'evidence-416399', 'evidence-359101', 'evidence-398463']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1001780', 'evidence-287752', 'evidence-457045', 'evidence-940688', 'evidence-13434', 'evidence-894521', 'evidence-1115033', 'evidence-601209', 'evidence-773685', 'evidence-123812']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-662931', 'evidence-94272', 'evidence-1100614', 'evidence-609211', 'evidence-165820', 'evidence-510034', 'evidence-1205623', 'evidence-382260', 'evidence-620559', 'evidence-468321']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1078744', 'evidence-450619', 'evidence-951599', 'evidence-291872', 'evidence-731175', 'evidence-1031822', 'evidence-29224', 'evidence-269643', 'evidence-504449', 'evidence-902582']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-551522', 'evidence-962456', 'evidence-399231', 'evidence-356854', 'evidence-1018042', 'evidence-545279', 'evidence-247672', 'evidence-772019', 'evidence-512936', 'evidence-630891']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-551522', 'evidence-648764', 'evidence-1018042', 'evidence-356854', 'evidence-247672', 'evidence-1190705', 'evidence-973577', 'evidence-788947', 'evidence-252580', 'evidence-620559']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-898871', 'evidence-311416', 'evidence-933078', 'evidence-161094', 'evidence-352954', 'evidence-594740', 'evidence-281727', 'evidence-385147', 'evidence-1101211', 'evidence-468656']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-701955', 'evidence-41418', 'evidence-435467', 'evidence-1174708', 'evidence-665314', 'evidence-145828', 'evidence-1028610', 'evidence-342078', 'evidence-387450', 'evidence-1082452']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-973064', 'evidence-725436', 'evidence-365357', 'evidence-381950', 'evidence-363372', 'evidence-707408', 'evidence-82292', 'evidence-593737', 'evidence-519752', 'evidence-734872']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-461050', 'evidence-846475', 'evidence-185243', 'evidence-410700', 'evidence-433225', 'evidence-301035', 'evidence-260744', 'evidence-734872', 'evidence-89411', 'evidence-529426']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-561041', 'evidence-342343', 'evidence-679387', 'evidence-708859', 'evidence-984836', 'evidence-986334', 'evidence-1150743', 'evidence-238974', 'evidence-190384', 'evidence-1084082']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1108581', 'evidence-627319', 'evidence-431693', 'evidence-811585', 'evidence-497518', 'evidence-1094759', 'evidence-958600', 'evidence-514779', 'evidence-1049692', 'evidence-192836']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-792877', 'evidence-947243', 'evidence-1124123', 'evidence-1198644', 'evidence-447955', 'evidence-77016', 'evidence-39028', 'evidence-399454', 'evidence-977735', 'evidence-118577']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-621880', 'evidence-968233', 'evidence-834978', 'evidence-794227', 'evidence-424893', 'evidence-920160', 'evidence-913626', 'evidence-902107', 'evidence-470418', 'evidence-6590']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1161423', 'evidence-1184683', 'evidence-502356', 'evidence-731175', 'evidence-836972', 'evidence-173323', 'evidence-308405', 'evidence-309746', 'evidence-976285', 'evidence-247680']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-664933', 'evidence-1166728', 'evidence-1131834', 'evidence-451174', 'evidence-445131', 'evidence-118189', 'evidence-1056104', 'evidence-366178', 'evidence-1006823', 'evidence-657969']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-698828', 'evidence-119750', 'evidence-1134338', 'evidence-529985', 'evidence-386943', 'evidence-1089063', 'evidence-433225', 'evidence-1199499', 'evidence-1041175', 'evidence-630043']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-949910', 'evidence-1182416', 'evidence-559290', 'evidence-51588', 'evidence-35538', 'evidence-434720', 'evidence-415619', 'evidence-970402', 'evidence-333939', 'evidence-855820']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-935858', 'evidence-407388', 'evidence-21424', 'evidence-360132', 'evidence-244062', 'evidence-148398', 'evidence-197205', 'evidence-252180', 'evidence-635501', 'evidence-1177428']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1119540', 'evidence-307837', 'evidence-164284', 'evidence-258341', 'evidence-1204847', 'evidence-280371', 'evidence-111518', 'evidence-898255', 'evidence-1156031', 'evidence-387291']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1174708', 'evidence-598168', 'evidence-435792', 'evidence-378444', 'evidence-1082452', 'evidence-1014342', 'evidence-1085055', 'evidence-1052408', 'evidence-184934', 'evidence-1106482']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1088556', 'evidence-820847', 'evidence-729613', 'evidence-1170', 'evidence-490170', 'evidence-64048', 'evidence-268048', 'evidence-319975', 'evidence-325821', 'evidence-219865']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-629358', 'evidence-338219', 'evidence-87345', 'evidence-251634', 'evidence-809426', 'evidence-1195163', 'evidence-423643', 'evidence-914228', 'evidence-588053', 'evidence-282556']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-269919', 'evidence-605550', 'evidence-299025', 'evidence-452577', 'evidence-423643', 'evidence-251138', 'evidence-1016625', 'evidence-726166', 'evidence-143084', 'evidence-1055692']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-645135', 'evidence-1075943', 'evidence-761183', 'evidence-690553', 'evidence-215395', 'evidence-154583', 'evidence-303695', 'evidence-318323', 'evidence-464935', 'evidence-1106741']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-844909', 'evidence-363372', 'evidence-407096', 'evidence-315033', 'evidence-464993', 'evidence-378394', 'evidence-287752', 'evidence-939493', 'evidence-879915', 'evidence-1155571']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-85031', 'evidence-539221', 'evidence-263772', 'evidence-185162', 'evidence-1066292', 'evidence-879500', 'evidence-55543', 'evidence-549640', 'evidence-832658', 'evidence-519752']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-459315', 'evidence-384334', 'evidence-427299', 'evidence-181451', 'evidence-921243', 'evidence-1109126', 'evidence-321505', 'evidence-459011', 'evidence-549640', 'evidence-403688']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-356854', 'evidence-620559', 'evidence-255445', 'evidence-938381', 'evidence-1105988', 'evidence-399231', 'evidence-410087', 'evidence-962456', 'evidence-648764', 'evidence-801956']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-105536', 'evidence-604477', 'evidence-110432', 'evidence-972696', 'evidence-123812', 'evidence-1187712', 'evidence-211912', 'evidence-8304', 'evidence-894521', 'evidence-1156443']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-956029', 'evidence-1017927', 'evidence-866245', 'evidence-738340', 'evidence-776091', 'evidence-1070837', 'evidence-432695', 'evidence-1087946', 'evidence-357997', 'evidence-1143730']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1068590', 'evidence-114562', 'evidence-1113083', 'evidence-455299', 'evidence-96686', 'evidence-1020939', 'evidence-1055320', 'evidence-1080833', 'evidence-175285', 'evidence-1125202']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-769659', 'evidence-392', 'evidence-69025', 'evidence-618304', 'evidence-253453', 'evidence-847727', 'evidence-20779', 'evidence-380716', 'evidence-1119610', 'evidence-480659']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1053558', 'evidence-553516', 'evidence-330124', 'evidence-223772', 'evidence-479438', 'evidence-271202', 'evidence-237614', 'evidence-393887', 'evidence-12424', 'evidence-967332']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-616168', 'evidence-298723', 'evidence-361694', 'evidence-866495', 'evidence-1075865', 'evidence-108394', 'evidence-94272', 'evidence-202388', 'evidence-210167', 'evidence-615618']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-755175', 'evidence-435467', 'evidence-262827', 'evidence-47605', 'evidence-1007486', 'evidence-1027012', 'evidence-675527', 'evidence-998010', 'evidence-948375', 'evidence-936703']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-824781', 'evidence-992780', 'evidence-280390', 'evidence-521885', 'evidence-185243', 'evidence-10368', 'evidence-1111410', 'evidence-888023', 'evidence-925231', 'evidence-889101']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-714675', 'evidence-1013740', 'evidence-844486', 'evidence-34759', 'evidence-870141', 'evidence-835623', 'evidence-1070085', 'evidence-897934', 'evidence-389165', 'evidence-141739']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-332770', 'evidence-33543', 'evidence-112875', 'evidence-972029', 'evidence-298630', 'evidence-712740', 'evidence-974202', 'evidence-610739', 'evidence-820057', 'evidence-865226']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-728645', 'evidence-116639', 'evidence-303036', 'evidence-1134336', 'evidence-68501', 'evidence-106742', 'evidence-1068851', 'evidence-43818', 'evidence-825580', 'evidence-815913']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-356854', 'evidence-204526', 'evidence-662931', 'evidence-843608', 'evidence-1205623', 'evidence-196937', 'evidence-208053', 'evidence-342507', 'evidence-1177233', 'evidence-620559']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-653981', 'evidence-408089', 'evidence-123812', 'evidence-121500', 'evidence-972696', 'evidence-169769', 'evidence-845138', 'evidence-240414', 'evidence-1154195', 'evidence-1088298']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-136867', 'evidence-193559', 'evidence-536054', 'evidence-152062', 'evidence-1176648', 'evidence-1120355', 'evidence-175169', 'evidence-121331', 'evidence-960249', 'evidence-67095']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-879500', 'evidence-604477', 'evidence-1055682', 'evidence-759337', 'evidence-123812', 'evidence-1187712', 'evidence-136165', 'evidence-760798', 'evidence-202388', 'evidence-211912']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-878263', 'evidence-759479', 'evidence-234076', 'evidence-809397', 'evidence-784559', 'evidence-452511', 'evidence-130992', 'evidence-177293', 'evidence-888963', 'evidence-404422']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-124062', 'evidence-363528', 'evidence-521885', 'evidence-286819', 'evidence-856239', 'evidence-834766', 'evidence-280904', 'evidence-1173053', 'evidence-514545', 'evidence-906794']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-224185', 'evidence-464939', 'evidence-736392', 'evidence-223996', 'evidence-776385', 'evidence-573616', 'evidence-345615', 'evidence-109563', 'evidence-1016696', 'evidence-20963']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1018575', 'evidence-1043526', 'evidence-1018829', 'evidence-500249', 'evidence-791159', 'evidence-515817', 'evidence-808749', 'evidence-1065162', 'evidence-878835', 'evidence-381399']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-178505', 'evidence-980959', 'evidence-368241', 'evidence-762715', 'evidence-19909', 'evidence-677910', 'evidence-1036643', 'evidence-216542', 'evidence-322520', 'evidence-328337']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-248009', 'evidence-1171424', 'evidence-1089698', 'evidence-534239', 'evidence-123016', 'evidence-801005', 'evidence-555406', 'evidence-658151', 'evidence-952310', 'evidence-855317']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1013521', 'evidence-902524', 'evidence-1119853', 'evidence-879915', 'evidence-512115', 'evidence-508793', 'evidence-198465', 'evidence-831754', 'evidence-180952', 'evidence-1156443']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-580567', 'evidence-369389', 'evidence-386380', 'evidence-513559', 'evidence-1133657', 'evidence-217138', 'evidence-1048388', 'evidence-888119', 'evidence-705942', 'evidence-509648']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-817269', 'evidence-593737', 'evidence-370435', 'evidence-1066292', 'evidence-55543', 'evidence-100145', 'evidence-1186875', 'evidence-574477', 'evidence-881508', 'evidence-707408']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-354829', 'evidence-114596', 'evidence-1150148', 'evidence-378444', 'evidence-1085447', 'evidence-734286', 'evidence-968233', 'evidence-920160', 'evidence-482695', 'evidence-660755']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-652884', 'evidence-983918', 'evidence-87345', 'evidence-130492', 'evidence-548492', 'evidence-625824', 'evidence-90274', 'evidence-800827', 'evidence-509654', 'evidence-247680']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-313762', 'evidence-346688', 'evidence-790775', 'evidence-1141045', 'evidence-756180', 'evidence-1015173', 'evidence-362073', 'evidence-273109', 'evidence-1101800', 'evidence-81519']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-550445', 'evidence-1192268', 'evidence-957349', 'evidence-894100', 'evidence-594019', 'evidence-345042', 'evidence-358086', 'evidence-892616', 'evidence-773994', 'evidence-938487']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-588787', 'evidence-807676', 'evidence-1110867', 'evidence-29430', 'evidence-710110', 'evidence-771238', 'evidence-29963', 'evidence-106742', 'evidence-815913', 'evidence-1184933']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-596172', 'evidence-631056', 'evidence-447813', 'evidence-1127600', 'evidence-866998', 'evidence-291327', 'evidence-672311', 'evidence-104121', 'evidence-1059528', 'evidence-1197437']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-851031', 'evidence-1040612', 'evidence-173281', 'evidence-1056186', 'evidence-1022509', 'evidence-1066768', 'evidence-828120', 'evidence-739706', 'evidence-808139', 'evidence-7473']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-775820', 'evidence-217521', 'evidence-616564', 'evidence-461050', 'evidence-810653', 'evidence-1192029', 'evidence-270365', 'evidence-1074778', 'evidence-683138', 'evidence-605242']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1206449', 'evidence-695660', 'evidence-22230', 'evidence-443756', 'evidence-1186458', 'evidence-931390', 'evidence-419114', 'evidence-719544', 'evidence-970402', 'evidence-229230']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-930563', 'evidence-663642', 'evidence-900586', 'evidence-575649', 'evidence-1175280', 'evidence-175657', 'evidence-754191', 'evidence-369389', 'evidence-386380', 'evidence-1001116']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-602026', 'evidence-12011', 'evidence-318516', 'evidence-652497', 'evidence-1020632', 'evidence-995233', 'evidence-793682', 'evidence-507225', 'evidence-285150', 'evidence-519727']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-805409', 'evidence-933078', 'evidence-13160', 'evidence-402137', 'evidence-898871', 'evidence-311416', 'evidence-633190', 'evidence-370378', 'evidence-122441', 'evidence-370468']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-856106', 'evidence-647121', 'evidence-1047395', 'evidence-282799', 'evidence-1007748', 'evidence-1092520', 'evidence-622781', 'evidence-269919', 'evidence-456747', 'evidence-367916']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-96686', 'evidence-624715', 'evidence-1207200', 'evidence-389165', 'evidence-741671', 'evidence-210566', 'evidence-1142444', 'evidence-1086865', 'evidence-554535', 'evidence-1159499']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-784155', 'evidence-51845', 'evidence-65132', 'evidence-378207', 'evidence-1119357', 'evidence-1165605', 'evidence-388010', 'evidence-307017', 'evidence-951078', 'evidence-1025353']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-423215', 'evidence-574489', 'evidence-101963', 'evidence-380361', 'evidence-590108', 'evidence-1205212', 'evidence-495048', 'evidence-954557', 'evidence-562327', 'evidence-130761']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-208863', 'evidence-269791', 'evidence-1182056', 'evidence-1155515', 'evidence-1178290', 'evidence-1003105', 'evidence-1004849', 'evidence-126279', 'evidence-901219', 'evidence-1202569']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-100394', 'evidence-510034', 'evidence-1006052', 'evidence-774592', 'evidence-100171', 'evidence-247680', 'evidence-491362', 'evidence-1047887', 'evidence-220020', 'evidence-551522']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1071718', 'evidence-54776', 'evidence-124851', 'evidence-868516', 'evidence-1113929', 'evidence-1179095', 'evidence-348360', 'evidence-358167', 'evidence-541689', 'evidence-620739']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1010285', 'evidence-1138009', 'evidence-33576', 'evidence-755597', 'evidence-1194346', 'evidence-114465', 'evidence-886863', 'evidence-900843', 'evidence-8793', 'evidence-324576']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-808385', 'evidence-453635', 'evidence-224855', 'evidence-94272', 'evidence-220020', 'evidence-312751', 'evidence-16975', 'evidence-699212', 'evidence-1040875', 'evidence-845688']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-905992', 'evidence-1068819', 'evidence-233245', 'evidence-1073602', 'evidence-229579', 'evidence-480631', 'evidence-63203', 'evidence-740744', 'evidence-152669', 'evidence-899905']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-842114', 'evidence-954212', 'evidence-410700', 'evidence-977735', 'evidence-1016625', 'evidence-8063', 'evidence-196453', 'evidence-608904', 'evidence-289045', 'evidence-418973']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-851031', 'evidence-59240', 'evidence-248455', 'evidence-599281', 'evidence-1056186', 'evidence-264761', 'evidence-119173', 'evidence-906956', 'evidence-253690', 'evidence-391045']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-538528', 'evidence-739706', 'evidence-683138', 'evidence-851031', 'evidence-591257', 'evidence-771238', 'evidence-253690', 'evidence-248455', 'evidence-399273', 'evidence-849471']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-544253', 'evidence-575135', 'evidence-337194', 'evidence-134061', 'evidence-614890', 'evidence-736986', 'evidence-770760', 'evidence-617222', 'evidence-642670', 'evidence-1109658']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-626410', 'evidence-109778', 'evidence-1056470', 'evidence-383498', 'evidence-1025757', 'evidence-926240', 'evidence-13434', 'evidence-971000', 'evidence-948853', 'evidence-19886']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1088298', 'evidence-1121998', 'evidence-508668', 'evidence-636423', 'evidence-584943', 'evidence-74778', 'evidence-121500', 'evidence-464979', 'evidence-988277', 'evidence-98842']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-533511', 'evidence-59188', 'evidence-377592', 'evidence-865726', 'evidence-895823', 'evidence-315033', 'evidence-506460', 'evidence-695091', 'evidence-332148', 'evidence-300156']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-976285', 'evidence-1161423', 'evidence-493616', 'evidence-94272', 'evidence-220020', 'evidence-1064139', 'evidence-247680', 'evidence-383274', 'evidence-584709', 'evidence-755175']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-654610', 'evidence-1174117', 'evidence-338219', 'evidence-809426', 'evidence-232927', 'evidence-635378', 'evidence-84454', 'evidence-98115', 'evidence-1092185', 'evidence-972570']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1117137', 'evidence-268745', 'evidence-396628', 'evidence-521030', 'evidence-724010', 'evidence-508420', 'evidence-655717', 'evidence-1074729', 'evidence-84454', 'evidence-738480']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-948375', 'evidence-794227', 'evidence-968233', 'evidence-594005', 'evidence-834978', 'evidence-424893', 'evidence-988909', 'evidence-153898', 'evidence-438440', 'evidence-913626']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1047048', 'evidence-22048', 'evidence-473200', 'evidence-574477', 'evidence-946262', 'evidence-942611', 'evidence-1075856', 'evidence-109104', 'evidence-76224', 'evidence-176074']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1184683', 'evidence-247680', 'evidence-662931', 'evidence-376130', 'evidence-616168', 'evidence-510034', 'evidence-716825', 'evidence-453635', 'evidence-98914', 'evidence-805335']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-450619', 'evidence-627472', 'evidence-938381', 'evidence-1051708', 'evidence-410087', 'evidence-801956', 'evidence-551522', 'evidence-890534', 'evidence-279492', 'evidence-718195']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-61272', 'evidence-8063', 'evidence-657950', 'evidence-936703', 'evidence-1082770', 'evidence-332148', 'evidence-858513', 'evidence-1091550', 'evidence-1066292', 'evidence-831116']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1062917', 'evidence-139165', 'evidence-768188', 'evidence-651795', 'evidence-438628', 'evidence-924788', 'evidence-860966', 'evidence-470508', 'evidence-442589', 'evidence-333326']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1183711', 'evidence-265967', 'evidence-815795', 'evidence-95039', 'evidence-11688', 'evidence-1034628', 'evidence-631872', 'evidence-810957', 'evidence-287127', 'evidence-108706']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-488727', 'evidence-1102763', 'evidence-257475', 'evidence-1158606', 'evidence-956954', 'evidence-973268', 'evidence-620559', 'evidence-355487', 'evidence-721883', 'evidence-1132320']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-889119', 'evidence-1190705', 'evidence-29224', 'evidence-187502', 'evidence-454312', 'evidence-51664', 'evidence-539925', 'evidence-617501', 'evidence-410087', 'evidence-518358']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-968233', 'evidence-584152', 'evidence-1198065', 'evidence-834757', 'evidence-794227', 'evidence-1150148', 'evidence-185180', 'evidence-1054845', 'evidence-990313', 'evidence-366668']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-29963', 'evidence-559069', 'evidence-1034941', 'evidence-589546', 'evidence-471916', 'evidence-53596', 'evidence-280204', 'evidence-254811', 'evidence-247197', 'evidence-298628']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-133938', 'evidence-718136', 'evidence-963412', 'evidence-631053', 'evidence-549539', 'evidence-1119729', 'evidence-81396', 'evidence-128689', 'evidence-150709', 'evidence-118140']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1099844', 'evidence-769821', 'evidence-962207', 'evidence-1067341', 'evidence-845541', 'evidence-529121', 'evidence-421276', 'evidence-779263', 'evidence-1069909', 'evidence-1020805']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-127999', 'evidence-1175347', 'evidence-285600', 'evidence-122504', 'evidence-684895', 'evidence-364924', 'evidence-1205331', 'evidence-848463', 'evidence-779966', 'evidence-700426']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-908316', 'evidence-479543', 'evidence-885836', 'evidence-346902', 'evidence-1047356', 'evidence-540478', 'evidence-315148', 'evidence-875371', 'evidence-979869', 'evidence-250931']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-446509', 'evidence-965161', 'evidence-1192617', 'evidence-636423', 'evidence-29077', 'evidence-75509', 'evidence-336860', 'evidence-171000', 'evidence-400437', 'evidence-82325']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-363958', 'evidence-415728', 'evidence-624594', 'evidence-628361', 'evidence-167011', 'evidence-23689', 'evidence-757938', 'evidence-548492', 'evidence-690855', 'evidence-163066']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-503460', 'evidence-1094467', 'evidence-1200842', 'evidence-489930', 'evidence-1007867', 'evidence-793124', 'evidence-814721', 'evidence-98115', 'evidence-51798', 'evidence-773904']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-575649', 'evidence-810126', 'evidence-1124018', 'evidence-19119', 'evidence-92119', 'evidence-255391', 'evidence-614574', 'evidence-608016', 'evidence-312865', 'evidence-127007']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1174955', 'evidence-1142444', 'evidence-657030', 'evidence-412952', 'evidence-264263', 'evidence-794919', 'evidence-416007', 'evidence-724010', 'evidence-958457', 'evidence-741089']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-27495', 'evidence-875252', 'evidence-124035', 'evidence-166605', 'evidence-1084296', 'evidence-995233', 'evidence-749407', 'evidence-833614', 'evidence-900868', 'evidence-793682']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-529248', 'evidence-18368', 'evidence-116639', 'evidence-175787', 'evidence-780659', 'evidence-743206', 'evidence-179554', 'evidence-212614', 'evidence-1077625', 'evidence-427121']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1049371', 'evidence-158204', 'evidence-616168', 'evidence-210167', 'evidence-223619', 'evidence-16169', 'evidence-950248', 'evidence-599055', 'evidence-956940', 'evidence-1111236']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-516402', 'evidence-693168', 'evidence-888963', 'evidence-404422', 'evidence-784559', 'evidence-792604', 'evidence-930414', 'evidence-774479', 'evidence-912662', 'evidence-391155']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-755597', 'evidence-725565', 'evidence-844144', 'evidence-803929', 'evidence-8793', 'evidence-894746', 'evidence-33576', 'evidence-71288', 'evidence-368473', 'evidence-575380']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-794055', 'evidence-227055', 'evidence-651403', 'evidence-864143', 'evidence-142656', 'evidence-341739', 'evidence-1061949', 'evidence-313955', 'evidence-206495', 'evidence-101509']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-725428', 'evidence-1068834', 'evidence-942779', 'evidence-1195607', 'evidence-945977', 'evidence-279393', 'evidence-947076', 'evidence-409754', 'evidence-5928', 'evidence-169104']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-429157', 'evidence-812408', 'evidence-371380', 'evidence-211360', 'evidence-81511', 'evidence-866265', 'evidence-1094979', 'evidence-431957', 'evidence-1160686', 'evidence-175598']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-990320', 'evidence-323541', 'evidence-937356', 'evidence-1075985', 'evidence-96441', 'evidence-617271', 'evidence-631826', 'evidence-633996', 'evidence-664539', 'evidence-221658']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-208863', 'evidence-269791', 'evidence-1155515', 'evidence-390216', 'evidence-1029618', 'evidence-207443', 'evidence-1178290', 'evidence-1115434', 'evidence-37274', 'evidence-1166955']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-318997', 'evidence-914993', 'evidence-299069', 'evidence-445359', 'evidence-298396', 'evidence-1098364', 'evidence-1086216', 'evidence-195729', 'evidence-213716', 'evidence-1200076']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-35625', 'evidence-1004521', 'evidence-90274', 'evidence-588917', 'evidence-469998', 'evidence-846475', 'evidence-149374', 'evidence-760798', 'evidence-1003462', 'evidence-691304']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-354016', 'evidence-763336', 'evidence-693024', 'evidence-563485', 'evidence-936836', 'evidence-1060245', 'evidence-285002', 'evidence-915272', 'evidence-358594', 'evidence-68566']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1190716', 'evidence-1074121', 'evidence-885209', 'evidence-553459', 'evidence-289779', 'evidence-481333', 'evidence-1162415', 'evidence-119592', 'evidence-1087907', 'evidence-230286']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-572512', 'evidence-48256', 'evidence-67732', 'evidence-622374', 'evidence-1075134', 'evidence-1050894', 'evidence-32901', 'evidence-780332', 'evidence-866329', 'evidence-1070419']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-986681', 'evidence-274044', 'evidence-481206', 'evidence-80853', 'evidence-844426', 'evidence-179283', 'evidence-548137', 'evidence-706949', 'evidence-991415', 'evidence-108078']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-578925', 'evidence-1105120', 'evidence-862997', 'evidence-726838', 'evidence-565971', 'evidence-143437', 'evidence-823621', 'evidence-602665', 'evidence-840950', 'evidence-133419']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-453529', 'evidence-714675', 'evidence-260744', 'evidence-175965', 'evidence-667703', 'evidence-930564', 'evidence-953997', 'evidence-1021635', 'evidence-185243', 'evidence-13780']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-941768', 'evidence-864757', 'evidence-44389', 'evidence-300085', 'evidence-75631', 'evidence-7561', 'evidence-185180', 'evidence-1046773', 'evidence-1016669', 'evidence-1007486']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-55991', 'evidence-510034', 'evidence-808385', 'evidence-220020', 'evidence-453635', 'evidence-889119', 'evidence-662931', 'evidence-410087', 'evidence-508812', 'evidence-94272']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-613135', 'evidence-399231', 'evidence-657496', 'evidence-1184683', 'evidence-648764', 'evidence-498380', 'evidence-625563', 'evidence-203533', 'evidence-504449', 'evidence-874346']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-580567', 'evidence-369389', 'evidence-1001116', 'evidence-386380', 'evidence-1048388', 'evidence-744921', 'evidence-257034', 'evidence-705942', 'evidence-509648', 'evidence-97587']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-526112', 'evidence-366949', 'evidence-510347', 'evidence-488270', 'evidence-475230', 'evidence-980959', 'evidence-107116', 'evidence-529355', 'evidence-526003', 'evidence-1056020']


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


['evidence-1175280', 'evidence-1050619', 'evidence-754191', 'evidence-930563', 'evidence-748548', 'evidence-326643', 'evidence-608016', 'evidence-1196738', 'evidence-877068', 'evidence-900586']


{'claim-752': {'initial_evidences': ['evidence-67732',
   'evidence-572512',
   'evidence-780332',
   'evidence-1061888',
   'evidence-452156',
   'evidence-685728',
   'evidence-48256',
   'evidence-1002110',
   'evidence-1050894',
   'evidence-1075134'],
  'claim_text': '[South Australia] has the most expensive electricity in the world.',
  'filtered_evidences': ['evidence-1075134',
   'evidence-67732',
   'evidence-572512',
   'evidence-780332',
   'evidence-1061888']},
 'claim-375': {'initial_evidences': ['evidence-647121',
   'evidence-559290',
   'evidence-415619',
   'evidence-361694',
   'evidence-949910',
   'evidence-970402',
   'evidence-121187',
   'evidence-866495',
   'evidence-1075865',
   'evidence-419267'],
  'claim_text': 'when 3 per cent of total annual global emissions of carbon dioxide are from humans and Australia prod\xaduces 1.3 per cent of this 3 per cent, then no amount of emissions reductio\xadn here will have any effect on global climate.',
  'filtered_evide

In [None]:
class HybridRetriever:
    def __init__(self, retriever: EvidenceClaimRetriever, llama_filter: LLaMAFilter):
        self.retriever = retriever
        self.filter = llama_filter

    def load_data(self, claims_path, evidence_path):
        """Load data for retriever"""
        self.retriever.load_data(claims_path, evidence_path)

    def load_dev_data(self, dev_claims_path):
        """Load dev data for retriever"""
        self.retriever.load_dev_data(dev_claims_path)

    # def train(self, output_dir='./trained_model'):
    #     """Train the retriever"""
    #     self.retriever.train(output_dir)

    def index_evidence(self, save_path='data/evidence_embeddings.pth'):
        """Index evidence for retriever"""
        self.retriever.index_evidence(save_path)
    def get_evidence_text(self, evidences):
        eids = [eid for eid, _ in evidences]
        return [(eid, self.retriever.evidence_map[eid]) for eid in eids]
    def retrieve_and_filter(self, claim_text, top_k_initial=10, top_k_final=5):
        """Retrieve top 10 with sentence-transformers, filter to top 5 with LLaMA"""
        initial_results = self.retriever.retrieve(claim_text, top_k=top_k_initial)
        final_results = self.filter.filter_evidence(claim_text, get_evidence_text(initial_results), final_k=top_k_final)
        return final_results

    def evaluate(self, claims_path, top_k_initial=10, top_k_final=5):
        """Evaluate the hybrid system"""
        with open(claims_path) as f:
            claims = json.load(f)

        recall_initial = 0
        recall_final = 0
        precision_final = 0
        mrr_final = 0
        runtimes = []
        memory_usages = []

        import time
        import psutil
        process = psutil.Process(os.getpid())

        for claim_id, claim_data in claims.items():
            claim_text = claim_data['claim_text']
            relevant = set(claim_data['evidences'])

            # Measure runtime and memory
            start_time = time.time()
            results = self.retrieve_and_filter(claim_text, top_k_initial, top_k_final)
            runtime = time.time() - start_time
            runtimes.append(runtime)
            memory_usages.append(process.memory_info().rss / 1024**2)  # MB

            # Get retrieved evidence IDs
            retrieved_final = {eid for eid, _, _ in results}
            retrieved_initial = {eid for eid, _ in self.retriever.retrieve(claim_text, top_k_initial)}

            # Recall for initial retriever
            recall_initial += len(relevant & retrieved_initial) / len(relevant) if relevant else 0

            # Recall for final hybrid
            recall_final += len(relevant & retrieved_final) / len(relevant) if relevant else 0

            # Precision for final hybrid
            precision_final += len(relevant & retrieved_final) / top_k_final if relevant else 0

            # MRR for final hybrid
            for rank, (eid, _, _) in enumerate(results, 1):
                if eid in relevant:
                    mrr_final += 1 / rank
                    break
            else:
                mrr_final += 0

        n = len(claims)
        return {
            "Recall@10 (Initial)": recall_initial / n,
            "Recall@5 (Final)": recall_final / n,
            "Precision@5 (Final)": precision_final / n,
            "MRR@5 (Final)": mrr_final / n,
            "Avg Runtime (s)": sum(runtimes) / n,
            "Avg Memory (MB)": sum(memory_usages) / n
        }