# 2025 COMP90042 Project

# 1.Dataset Processing

# 2. Model Implementation

In [1]:
import json
import os
import random
import torch
from datasets import Dataset
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, losses, InputExample, evaluation

class EvidenceClaimRetriever:
    def __init__(self, model_name='all-mpnet-base-v2', batch_size=16, num_epochs=3):
        self.model = SentenceTransformer(model_name)
        self.batch_size = batch_size
        self.num_epochs = num_epochs
        self.evidence_map = {}
        self.label_mapping = {
            'SUPPORTS': 1,
            'REFUTES': 1,
            'DISPUTED': 1,
            'NOT_ENOUGH_INFO': 0
        }

    def load_data(self, claims_path, evidence_path):
        """Load training data and evidence corpus"""
        # Load evidence corpus
        with open(evidence_path) as f:
            self.evidence_map = json.load(f)

        # Load training claims
        with open(claims_path) as f:
            train_claims = json.load(f)

        # Create training examples
        self.train_examples = []
        for claim_id, claim_data in train_claims.items():
            self._process_claim(claim_data)

    def _process_claim(self, claim_data):
        """Create training examples for a single claim"""
        claim_text = claim_data['claim_text']
        evidence_ids = claim_data['evidences']

        # Add positive examples
        for eid in evidence_ids:
            if eid in self.evidence_map:
                self.train_examples.append(InputExample(
                    texts=[claim_text, self.evidence_map[eid]],
                    label=self.label_mapping[claim_data['claim_label']]
                ))

        # Add negative examples
        self._add_negative_examples(claim_text, evidence_ids, claim_data['claim_label'])

    def _add_negative_examples(self, claim_text, positive_ids, label):
        """Add negative examples with proper ratio"""
        all_evidence_ids = list(self.evidence_map.keys())
        negative_candidates = list(set(all_evidence_ids) - set(positive_ids))

        # Determine number of negatives based on label
        if label == 'NOT_ENOUGH_INFO':
            num_negatives = min(2, len(negative_candidates))
        else:
            num_negatives = min(3 * len(positive_ids), len(negative_candidates))

        # Add selected negatives
        for eid in random.sample(negative_candidates, num_negatives):
            self.train_examples.append(InputExample(
                texts=[claim_text, self.evidence_map[eid]],
                label=0
            ))

    def load_dev_data(self, dev_claims_path):
        """Load separate dev set for validation"""
        with open(dev_claims_path) as f:
            dev_claims = json.load(f)

        # Prepare for InformationRetrievalEvaluator
        self.dev_queries = {}
        self.relevant_docs = {}

        for claim_id, claim_data in dev_claims.items():
            claim_text = claim_data['claim_text']
            evidence_ids = claim_data['evidences']

            self.dev_queries[claim_id] = claim_text
            self.relevant_docs[claim_id] = set(evidence_ids)

    def train(self, output_dir='./trained_model'):
        """Train using dedicated dev set for validation"""
        # Create evaluator using separate dev set
        dev_evaluator = evaluation.InformationRetrievalEvaluator(
            queries=self.dev_queries,
            corpus=self.evidence_map,
            relevant_docs=self.relevant_docs,
            show_progress_bar=True
        )

        # Create DataLoader directly from InputExamples
        train_dataloader = DataLoader(
            self.train_examples,  # Use the list of InputExamples directly
            shuffle=True,
            batch_size=self.batch_size
        )

        # Use MultipleNegativesRankingLoss
        train_loss = losses.MultipleNegativesRankingLoss(self.model)

        # Training configuration
        self.model.fit(
            train_objectives=[(train_dataloader, train_loss)],
            evaluator=dev_evaluator,
            epochs=self.num_epochs,
            evaluation_steps=100,
            warmup_steps=int(len(self.train_examples) * 0.1),
            output_path=output_dir,
            show_progress_bar=True
        )
        
    def retrieve(self, claim_text, top_k=5):
        """Retrieve evidence for a claim"""
        claim_embed = self.model.encode(claim_text)
        scores = torch.nn.functional.cosine_similarity(
            torch.tensor(claim_embed),
            torch.tensor(self.evidence_embeddings)
        )
        top_indices = torch.topk(scores, k=top_k).indices.tolist()
        return [(self.evidence_ids[i], scores[i].item()) for i in top_indices]

    def index_evidence(self, save_path='data/evidence_embeddings.pth'):
        """Precompute all evidence embeddings and optionally save them"""
        self.evidence_ids = list(self.evidence_map.keys())
        self.evidence_texts = [self.evidence_map[eid] for eid in self.evidence_ids]

        # Check if embeddings already exist
        if save_path and os.path.exists(save_path):
            print("Loading precomputed evidence embeddings...")
            data = torch.load(save_path, weights_only=False)
            self.evidence_embeddings = data['embeddings']
            self.evidence_ids = data['ids']
        else:
            print("Computing evidence embeddings...")
            self.evidence_embeddings = self.model.encode(
                self.evidence_texts,
                show_progress_bar=True,
                batch_size=32
            )
            # Save embeddings if save_path is provided
            if save_path:
                print("Saving evidence embeddings...")
                torch.save({'embeddings': self.evidence_embeddings, 'ids': self.evidence_ids}, save_path)


    

In [None]:
# evidenceRetriever = EvidenceClaimRetriever(
#         model_name='all-MiniLM-L6-v2',
#         batch_size=32,
#         num_epochs=5
#     )


evidenceRetriever = EvidenceClaimRetriever(
        model_name='./climate_retriever')


In [3]:

# 1. Load training data and evidence
evidenceRetriever.load_data(
    claims_path='data/train-claims.json',
    evidence_path='data/evidence.json'
)


In [4]:

# 2. Load separate dev set
evidenceRetriever.load_dev_data('data/dev-claims-baseline.json')

In [5]:

# 3. Precompute evidence embeddings
evidenceRetriever.index_evidence()

Loading precomputed evidence embeddings...


In [6]:


# 4. Train with dev set validation
# evidenceRetriever.train(output_dir='./climate_retriever')


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]



Step,Training Loss,Validation Loss,Cosine Accuracy@1,Cosine Accuracy@3,Cosine Accuracy@5,Cosine Accuracy@10,Cosine Precision@1,Cosine Precision@3,Cosine Precision@5,Cosine Precision@10,Cosine Recall@1,Cosine Recall@3,Cosine Recall@5,Cosine Recall@10,Cosine Ndcg@10,Cosine Mrr@10,Cosine Map@100
100,No log,No log,0.12987,0.220779,0.24026,0.318182,0.12987,0.084416,0.061039,0.044156,0.021645,0.042208,0.050866,0.073593,0.081314,0.185913,0.048753
200,No log,No log,0.116883,0.162338,0.214286,0.272727,0.116883,0.064935,0.054545,0.037662,0.019481,0.032468,0.045455,0.062771,0.068896,0.155878,0.039807
300,No log,No log,0.116883,0.188312,0.201299,0.279221,0.116883,0.075758,0.050649,0.037013,0.019481,0.037879,0.042208,0.061688,0.069796,0.161474,0.040461
359,No log,No log,0.123377,0.181818,0.220779,0.279221,0.123377,0.073593,0.054545,0.036364,0.020563,0.036797,0.045455,0.060606,0.070229,0.165961,0.041159
400,No log,No log,0.123377,0.188312,0.227273,0.292208,0.123377,0.075758,0.054545,0.040909,0.020563,0.037879,0.045455,0.068182,0.075287,0.170127,0.042715
500,3.761600,No log,0.11039,0.201299,0.246753,0.318182,0.11039,0.080087,0.062338,0.041558,0.018398,0.040043,0.051948,0.069264,0.075295,0.169125,0.042775
600,3.761600,No log,0.116883,0.181818,0.253247,0.337662,0.116883,0.075758,0.064935,0.046104,0.019481,0.037879,0.054113,0.07684,0.080029,0.171228,0.04497
700,3.761600,No log,0.123377,0.201299,0.266234,0.344156,0.123377,0.080087,0.067532,0.049351,0.020563,0.040043,0.056277,0.082251,0.084536,0.1787,0.047683
718,3.761600,No log,0.116883,0.181818,0.253247,0.337662,0.116883,0.073593,0.064935,0.048052,0.019481,0.036797,0.054113,0.080087,0.082346,0.173444,0.047236
800,3.761600,No log,0.11039,0.188312,0.266234,0.344156,0.11039,0.080087,0.07013,0.048052,0.018398,0.040043,0.058442,0.080087,0.082196,0.170637,0.048091


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [01:06<26:38, 66.59s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [01:29<15:39, 40.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:52<11:56, 32.56s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [02:13<09:50, 28.13s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [02:34<08:32, 25.61s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:55<07:36, 24.02s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [03:16<06:53, 22.97s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [03:37<06:21, 22.44s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:58<05:50, 21.90s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [04:19<05:23, 21.57s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [04:41<05:06, 21.92s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [05:05<04:50, 22.38s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [05:26<04:22, 21.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [05:46<03:54, 21.35s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [06:06<03:30, 21.08s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [06:27<03:08, 20.93s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [06:48<02:48, 21.04s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [07:09<02:26, 21.00s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [07:30<02:05, 20.88s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [07:50<01:44, 20.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [08:11<01:23, 20.90s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [08:32<01:02, 20.92s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:53<00:41, 20.93s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [09:14<00:20, 20.96s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [09:19<00:00, 22.40s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:21<08:37, 21.57s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:42<08:10, 21.32s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:03<07:44, 21.12s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:25<07:32, 21.56s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:48<07:16, 21.83s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:09<06:54, 21.81s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:32<06:37, 22.07s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:54<06:14, 22.06s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:17<05:55, 22.24s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:39<05:35, 22.37s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [04:01<05:11, 22.27s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:24<04:50, 22.34s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:46<04:25, 22.13s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [05:07<03:59, 21.80s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:28<03:36, 21.63s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:50<03:15, 21.70s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [06:12<02:56, 22.03s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:35<02:34, 22.04s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:56<02:12, 22.01s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [07:19<01:50, 22.05s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:41<01:28, 22.06s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [08:03<01:06, 22.06s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:24<00:43, 21.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:46<00:21, 21.87s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:50<00:00, 21.22s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:21<08:45, 21.88s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:43<08:18, 21.69s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:05<07:59, 21.78s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:26<07:35, 21.67s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:48<07:15, 21.77s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:10<06:52, 21.73s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:31<06:28, 21.61s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:53<06:06, 21.58s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:14<05:43, 21.48s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:35<05:21, 21.46s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:57<05:01, 21.52s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:19<04:39, 21.49s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:40<04:17, 21.44s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [05:01<03:54, 21.31s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:22<03:33, 21.33s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:43<03:11, 21.23s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [06:04<02:49, 21.22s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:25<02:28, 21.15s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:47<02:08, 21.40s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [07:10<01:48, 21.68s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:31<01:26, 21.66s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:53<01:05, 21.78s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:15<00:43, 21.69s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:36<00:21, 21.63s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:40<00:00, 20.83s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:21<08:37, 21.58s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:43<08:24, 21.93s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:05<08:02, 21.92s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:27<07:37, 21.78s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:49<07:15, 21.80s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:11<06:56, 21.91s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:33<06:36, 22.01s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:55<06:13, 21.95s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:16<05:49, 21.84s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:38<05:27, 21.84s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [04:00<05:04, 21.78s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:22<04:44, 21.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:44<04:22, 21.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [05:06<04:00, 21.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:27<03:37, 21.79s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:49<03:14, 21.65s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [06:10<02:53, 21.65s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:32<02:31, 21.60s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:53<02:09, 21.57s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [07:15<01:48, 21.67s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:37<01:26, 21.72s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:59<01:05, 21.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:22<00:44, 22.23s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:47<00:22, 22.92s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:51<00:00, 21.28s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:23<09:21, 23.38s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:46<08:55, 23.27s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:11<08:45, 23.91s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:35<08:23, 23.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:59<07:57, 23.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:22<07:34, 23.90s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:47<07:13, 24.08s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [03:11<06:48, 24.00s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:34<06:18, 23.63s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:57<05:53, 23.59s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [04:19<05:24, 23.15s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:43<05:03, 23.38s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [05:07<04:40, 23.42s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [05:30<04:18, 23.50s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:53<03:53, 23.38s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [06:16<03:29, 23.24s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [06:40<03:06, 23.30s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [07:02<02:40, 22.99s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [07:25<02:17, 22.90s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [07:48<01:54, 22.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [08:11<01:31, 22.92s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [08:34<01:08, 22.93s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:56<00:45, 22.74s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [09:18<00:22, 22.68s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [09:23<00:00, 22.52s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:22<08:53, 22.21s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:44<08:27, 22.07s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:07<08:22, 22.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:32<08:10, 23.36s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:54<07:41, 23.05s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:17<07:17, 23.00s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:40<06:53, 22.95s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [03:02<06:25, 22.69s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:26<06:07, 22.97s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:49<05:46, 23.12s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [04:11<05:19, 22.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:34<04:55, 22.69s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:56<04:30, 22.57s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [05:19<04:09, 22.66s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:40<03:41, 22.15s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [06:01<03:15, 21.75s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [06:22<02:52, 21.55s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:42<02:29, 21.30s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [07:03<02:06, 21.12s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [07:24<01:44, 20.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:44<01:23, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [08:05<01:02, 20.94s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:26<00:41, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:47<00:20, 20.89s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:51<00:00, 21.25s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:20<08:20, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:41<07:58, 20.81s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:02<07:41, 20.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:23<07:17, 20.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:44<06:57, 20.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:05<06:36, 20.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:26<06:15, 20.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:47<05:58, 21.08s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:08<05:35, 20.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:29<05:13, 20.91s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:49<04:52, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:10<04:31, 20.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:31<04:10, 20.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:52<03:48, 20.77s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:12<03:26, 20.69s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:33<03:06, 20.69s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:54<02:46, 20.78s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:14<02:24, 20.71s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:35<02:04, 20.68s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [06:56<01:43, 20.64s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:16<01:22, 20.62s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:37<01:02, 20.72s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [07:58<00:41, 20.68s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:19<00:20, 20.75s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:22<00:00, 20.12s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:21<08:32, 21.35s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:42<08:14, 21.50s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:04<07:52, 21.50s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:25<07:28, 21.37s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:47<07:10, 21.51s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:08<06:46, 21.40s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:29<06:24, 21.35s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:50<06:01, 21.27s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:11<05:38, 21.16s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:33<05:18, 21.23s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:54<04:55, 21.12s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:15<04:34, 21.14s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:36<04:14, 21.20s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:57<03:52, 21.10s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:18<03:30, 21.07s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:39<03:09, 21.03s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [06:00<02:48, 21.12s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:21<02:27, 21.09s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:42<02:06, 21.00s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [07:03<01:44, 20.92s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:24<01:23, 20.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:45<01:02, 21.00s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:06<00:41, 20.94s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:27<00:20, 20.98s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:31<00:00, 20.45s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:20<08:23, 20.98s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:42<08:04, 21.06s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:03<07:42, 21.04s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:24<07:21, 21.00s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:45<07:00, 21.01s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:06<06:39, 21.04s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:27<06:19, 21.09s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:48<05:58, 21.12s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:09<05:36, 21.02s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:30<05:15, 21.04s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:51<04:53, 20.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:12<04:33, 21.02s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:33<04:12, 21.07s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:54<03:50, 20.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:15<03:29, 20.97s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:36<03:08, 20.91s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:57<02:47, 20.98s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:18<02:26, 20.98s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:38<02:05, 20.90s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [06:59<01:44, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:20<01:23, 20.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:41<01:02, 20.92s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:02<00:41, 20.93s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:23<00:21, 21.06s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:27<00:00, 20.32s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:21<08:25, 21.07s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:42<08:06, 21.16s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:03<07:45, 21.14s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:24<07:21, 21.03s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:45<07:01, 21.06s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:06<06:40, 21.06s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:27<06:19, 21.06s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:48<05:58, 21.07s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:09<05:36, 21.03s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:30<05:15, 21.05s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:51<04:54, 21.01s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:12<04:34, 21.08s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:34<04:13, 21.13s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:54<03:51, 21.07s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:15<03:30, 21.02s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:36<03:08, 20.94s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:57<02:47, 20.99s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:18<02:26, 20.98s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:39<02:05, 20.91s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [07:00<01:44, 20.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:20<01:23, 20.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:41<01:02, 20.88s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:02<00:41, 20.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:23<00:20, 20.92s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:27<00:00, 20.31s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:21<08:26, 21.10s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:42<08:05, 21.09s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:03<07:43, 21.05s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:24<07:20, 20.98s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:45<06:59, 20.99s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:06<06:38, 20.99s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:27<06:17, 20.99s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:48<05:56, 20.98s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:08<05:34, 20.90s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:29<05:14, 20.99s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:50<04:53, 20.95s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:11<04:32, 20.99s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:33<04:12, 21.06s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:53<03:50, 20.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:14<03:29, 20.91s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:35<03:08, 20.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:56<02:47, 21.00s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:19<02:30, 21.45s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:41<02:09, 21.59s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [07:02<01:47, 21.41s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:23<01:25, 21.48s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:46<01:05, 21.77s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:07<00:42, 21.48s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:28<00:21, 21.37s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:32<00:00, 20.48s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:21<08:27, 21.13s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:42<08:02, 21.00s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:03<07:50, 21.40s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:24<07:24, 21.14s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:45<07:01, 21.07s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:07<06:42, 21.18s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:28<06:20, 21.14s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:48<05:57, 21.05s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:09<05:34, 20.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:30<05:12, 20.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:51<04:51, 20.83s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:11<04:31, 20.88s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:32<04:10, 20.91s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:53<03:49, 20.84s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:14<03:28, 20.82s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:35<03:07, 20.81s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:56<02:47, 20.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:16<02:25, 20.79s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:37<02:04, 20.73s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [06:57<01:43, 20.66s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:18<01:22, 20.64s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:39<01:02, 20.70s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [07:59<00:41, 20.67s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:21<00:20, 20.79s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:24<00:00, 20.19s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:20<08:23, 20.99s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:41<08:00, 20.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:02<07:39, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:23<07:15, 20.76s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:44<06:56, 20.84s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:05<06:37, 20.91s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:26<06:16, 20.94s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:47<05:55, 20.94s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:07<05:33, 20.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:28<05:12, 20.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:49<04:50, 20.78s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:10<04:29, 20.77s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:30<04:08, 20.74s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:51<03:47, 20.66s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:11<03:26, 20.63s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:32<03:05, 20.61s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:53<02:46, 20.78s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:14<02:25, 20.80s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:35<02:04, 20.83s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [06:56<01:44, 20.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:18<01:24, 21.16s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:39<01:03, 21.21s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [07:59<00:42, 21.02s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:21<00:21, 21.05s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:25<00:00, 20.21s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:21<08:32, 21.37s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:42<08:01, 20.94s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:03<07:44, 21.13s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:24<07:22, 21.05s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:45<07:00, 21.02s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:06<06:39, 21.01s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:27<06:17, 21.00s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:48<05:58, 21.07s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:09<05:34, 20.94s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:29<05:13, 20.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:51<04:54, 21.01s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:12<04:35, 21.16s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:33<04:13, 21.13s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:54<03:51, 21.05s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:15<03:30, 21.02s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:36<03:10, 21.13s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:58<02:50, 21.26s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:19<02:28, 21.19s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:40<02:06, 21.04s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [07:01<01:45, 21.04s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:22<01:24, 21.04s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:43<01:03, 21.00s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:04<00:42, 21.14s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:26<00:21, 21.38s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:30<00:00, 20.43s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:21<08:32, 21.36s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:42<08:05, 21.11s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:03<07:48, 21.27s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:24<07:23, 21.12s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:46<07:08, 21.43s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:08<06:50, 21.63s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:29<06:27, 21.53s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:51<06:04, 21.43s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:12<05:42, 21.39s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:33<05:20, 21.39s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:56<05:02, 21.62s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:17<04:41, 21.65s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:39<04:18, 21.56s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [05:00<03:55, 21.42s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:21<03:33, 21.34s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:42<03:10, 21.20s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [06:03<02:49, 21.22s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:24<02:27, 21.10s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:44<02:05, 20.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [07:05<01:44, 20.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:26<01:23, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:47<01:02, 20.91s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:07<00:41, 20.79s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:29<00:20, 20.87s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:32<00:00, 20.51s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:21<08:31, 21.30s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:42<08:07, 21.21s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:03<07:46, 21.21s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:24<07:25, 21.19s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:45<07:00, 21.03s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:06<06:41, 21.12s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:27<06:19, 21.06s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:48<05:57, 21.02s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:09<05:35, 20.98s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:30<05:14, 20.94s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:51<04:52, 20.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:12<04:31, 20.90s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:33<04:10, 20.91s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:53<03:49, 20.84s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:14<03:28, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:35<03:06, 20.77s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:56<02:46, 20.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:17<02:25, 20.84s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:37<02:04, 20.82s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [06:58<01:43, 20.78s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:19<01:23, 20.77s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:40<01:02, 20.83s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:01<00:42, 21.07s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:23<00:21, 21.14s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:27<00:00, 20.29s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:20<08:20, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:41<08:01, 20.93s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:02<07:39, 20.91s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:23<07:17, 20.83s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:44<06:57, 20.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:05<06:36, 20.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:26<06:16, 20.89s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:47<05:55, 20.92s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:07<05:33, 20.83s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:28<05:13, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:49<04:51, 20.81s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:10<04:31, 20.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:31<04:10, 20.90s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:51<03:48, 20.80s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:12<03:27, 20.80s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:33<03:06, 20.74s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:54<02:47, 20.91s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:16<02:27, 21.07s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:36<02:05, 20.95s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [06:57<01:44, 20.88s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:18<01:23, 20.93s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:39<01:02, 20.93s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:00<00:41, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:21<00:20, 20.96s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:25<00:00, 20.21s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:21<08:25, 21.06s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:42<08:03, 21.04s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:03<07:43, 21.08s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:24<07:20, 20.98s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:45<07:04, 21.22s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:06<06:41, 21.15s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:27<06:20, 21.14s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:49<05:59, 21.17s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:09<05:37, 21.09s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:30<05:15, 21.02s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:51<04:54, 21.04s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:13<04:34, 21.08s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:34<04:12, 21.08s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:54<03:50, 20.99s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:16<03:31, 21.12s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:37<03:10, 21.11s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:59<02:50, 21.31s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:20<02:28, 21.19s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:40<02:06, 21.02s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [07:01<01:44, 20.92s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:22<01:23, 20.92s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:43<01:02, 20.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [08:04<00:41, 20.95s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:25<00:20, 21.00s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:29<00:00, 20.37s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:21<08:34, 21.44s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:42<08:08, 21.25s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:03<07:47, 21.27s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:24<07:25, 21.22s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:46<07:02, 21.15s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:07<06:41, 21.14s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:27<06:18, 21.03s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:48<05:56, 20.95s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:09<05:33, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:30<05:12, 20.84s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:50<04:50, 20.76s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:11<04:30, 20.84s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:32<04:10, 20.87s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:53<03:48, 20.78s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:14<03:28, 20.82s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:34<03:06, 20.76s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:55<02:45, 20.75s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:16<02:24, 20.68s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:36<02:03, 20.61s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [06:56<01:42, 20.54s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:17<01:22, 20.53s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:37<01:01, 20.54s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [07:58<00:41, 20.57s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:19<00:20, 20.62s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:23<00:00, 20.13s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:20<08:18, 20.77s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:41<07:57, 20.77s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:02<07:37, 20.80s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:23<07:15, 20.74s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:43<06:55, 20.77s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:04<06:35, 20.79s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:25<06:14, 20.78s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:46<05:53, 20.80s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:06<05:32, 20.77s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:27<05:12, 20.80s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:48<04:51, 20.82s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:09<04:30, 20.83s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:30<04:10, 20.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:50<03:48, 20.74s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:11<03:26, 20.70s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:32<03:06, 20.68s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:53<02:46, 20.76s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:13<02:24, 20.71s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:34<02:04, 20.70s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [06:54<01:43, 20.62s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:15<01:22, 20.61s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:36<01:01, 20.63s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [07:56<00:41, 20.60s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:17<00:20, 20.61s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:21<00:00, 20.04s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:20<08:21, 20.92s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:41<07:56, 20.73s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:02<07:36, 20.74s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:22<07:14, 20.71s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:43<06:53, 20.68s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:04<06:34, 20.75s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:25<06:13, 20.75s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:45<05:52, 20.76s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:06<05:31, 20.74s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:27<05:10, 20.73s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:48<04:49, 20.70s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:09<04:30, 20.79s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:29<04:09, 20.79s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:50<03:48, 20.74s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:11<03:26, 20.69s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:31<03:05, 20.65s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:52<02:45, 20.73s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:13<02:24, 20.68s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:33<02:03, 20.64s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [06:54<01:43, 20.65s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:14<01:22, 20.65s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:35<01:01, 20.67s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [07:56<00:41, 20.69s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:17<00:20, 20.69s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:20<00:00, 20.03s/it]


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Corpus Chunks:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   4%|▍         | 1/25 [00:20<08:16, 20.71s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:   8%|▊         | 2/25 [00:41<07:57, 20.76s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  12%|█▏        | 3/25 [01:02<07:35, 20.73s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  16%|█▌        | 4/25 [01:22<07:13, 20.66s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  20%|██        | 5/25 [01:43<06:54, 20.70s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  24%|██▍       | 6/25 [02:04<06:33, 20.73s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  28%|██▊       | 7/25 [02:25<06:13, 20.77s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  32%|███▏      | 8/25 [02:45<05:52, 20.74s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  36%|███▌      | 9/25 [03:06<05:30, 20.67s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  40%|████      | 10/25 [03:27<05:10, 20.73s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  44%|████▍     | 11/25 [03:47<04:49, 20.69s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  48%|████▊     | 12/25 [04:08<04:30, 20.80s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  52%|█████▏    | 13/25 [04:29<04:10, 20.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  56%|█████▌    | 14/25 [04:50<03:48, 20.79s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  60%|██████    | 15/25 [05:11<03:27, 20.72s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  64%|██████▍   | 16/25 [05:31<03:06, 20.76s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  68%|██████▊   | 17/25 [05:53<02:47, 20.96s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  72%|███████▏  | 18/25 [06:13<02:25, 20.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  76%|███████▌  | 19/25 [06:34<02:04, 20.79s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  80%|████████  | 20/25 [06:55<01:43, 20.79s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  84%|████████▍ | 21/25 [07:16<01:23, 20.86s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  88%|████████▊ | 22/25 [07:37<01:02, 20.93s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  92%|█████████▏| 23/25 [07:58<00:41, 20.85s/it]

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

Corpus Chunks:  96%|█████████▌| 24/25 [08:19<00:20, 20.87s/it]

Batches:   0%|          | 0/276 [00:00<?, ?it/s]

Corpus Chunks: 100%|██████████| 25/25 [08:22<00:00, 20.11s/it]


In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re
from typing import List, Tuple, Dict
import json
from tqdm import tqdm

class LLaMAFilter:
    def __init__(self, model_name="meta-llama/Llama-3.1-8B-Instruct"):
        self.device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
        
        self.tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            padding_side="left",
            truncation_side="left"
        )
        self.tokenizer.pad_token = self.tokenizer.eos_token
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        
        self.rating_template = (
            "[INST] Rate claim-evidence relevance from 1-5 (1=irrelevant, 5=perfect). "
            "ONLY respond with the number.\n\n"
            "Claim: {claim}\nEvidence: {evidence}\nRating: [/INST]"
        )

    def filter_evidence(self, claim: str, evidence_candidates: List[Tuple[str, str]], final_k: int = 5) -> List[Tuple[str, str, int]]:
        """Optimized evidence filtering with batch processing"""
        # Batch prompts
        prompts = [
            self.rating_template.format(claim=claim, evidence=text)
            for eid, text in evidence_candidates
        ]
        
        # Batch tokenize
        inputs = self.tokenizer(
            prompts,
            return_tensors="pt",
            padding=True,
            truncation=True
        ).to(self.device)
        
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=2, 
                temperature=0.1,  
                pad_token_id=self.tokenizer.eos_token_id,
                use_cache=True
            )
        
        scored_evidence = []
        decoded = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
        
        for (eid, text), output in zip(evidence_candidates, decoded):
            rating = 1  # Default if parsing fails
            if match := re.search(r'\d', output.split("Rating:")[-1]):
                rating = min(5, max(1, int(match.group())))
                
            scored_evidence.append((eid, text, rating))
        
        # Return top-k
        return sorted(scored_evidence, key=lambda x: -x[2])[:final_k]

In [7]:
def get_evidence_text(evidences):
    return [(eid, evidenceRetriever.evidence_map[eid]) for eid in evidences]

In [8]:
def initial_retrieval(retriever, test_claims_path):
    with open(test_claims_path) as f:
        test_claims = json.load(f)
    
    output = {}
    for claim_id, claim_data in test_claims.items():
        # Retrieve candidates
        candidates = retriever.retrieve(claim_data['claim_text'], top_k=10)
        retrieved = [eid for eid, _  in candidates[:10]]
        output[claim_id] = {
            "initial_evidences": retrieved,
            "claim_text": claim_data['claim_text']
        }

    # store output
    with open('data/initial_retrieval_output.json', 'w') as f:
        json.dump(output, f, indent=2)
    return output


In [9]:
res = initial_retrieval(evidenceRetriever, test_claims_path='data/dev-claims-baseline.json')

In [10]:
llama_filter = LLaMAFilter()

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [12]:
def process_claims(llama_filterer, dev_claims_set):
    for claim_id, claim_data in dev_claims_set.items():
        candidates = claim_data['initial_evidences']
        filtered = llama_filterer.filter_evidence(claim_data['claim_text'], get_evidence_text(candidates), final_k=6)
        print(filtered)
        dev_claims_set[claim_id]['filtered_evidences'] = [eid for eid, _, _ in filtered]
        # store output
    with open('data/filtered_retrieval_output.json', 'w') as f:
        json.dump(dev_claims_set, f, indent=2)
    return dev_claims_set

res = process_claims(llama_filter,res)
res

[('evidence-572512', '"South Australia has the highest power prices in the world".', 5), ('evidence-67732', '[citation needed] South Australia has the highest retail price for electricity in the country.', 1), ('evidence-780332', 'Industrialised countries such as Canada, the US, and Australia are among the highest per capita consumers of electricity in the world, which is possible thanks to a widespread electrical distribution network.', 1), ('evidence-452156', 'Energy in Australia is the production in Australia of energy and electricity, for consumption or export.', 1), ('evidence-48256', 'One of the most powerful power grids in the world supplies power to the state of Queensland, Australia.', 1), ('evidence-808896', 'The class are the first electric trains to operate in South Australia.', 1)]
[('evidence-559290', "Australia’s total greenhouse gas emissions increased by 0.3% in the first six months of the Carbon Tax to December 2012 to 276.5 Mt CO2 equiv, while Australia's gross domes

{'claim-752': {'initial_evidences': ['evidence-67732',
   'evidence-572512',
   'evidence-780332',
   'evidence-452156',
   'evidence-48256',
   'evidence-808896',
   'evidence-1061888',
   'evidence-252686',
   'evidence-723533',
   'evidence-169170'],
  'claim_text': '[South Australia] has the most expensive electricity in the world.',
  'filtered_evidences': ['evidence-572512',
   'evidence-67732',
   'evidence-780332',
   'evidence-452156',
   'evidence-48256',
   'evidence-808896']},
 'claim-375': {'initial_evidences': ['evidence-334443',
   'evidence-357540',
   'evidence-724468',
   'evidence-316768',
   'evidence-342344',
   'evidence-646443',
   'evidence-553922',
   'evidence-559290',
   'evidence-281950',
   'evidence-441151'],
  'claim_text': 'when 3 per cent of total annual global emissions of carbon dioxide are from humans and Australia prod\xaduces 1.3 per cent of this 3 per cent, then no amount of emissions reductio\xadn here will have any effect on global climate.',
  