In [None]:
!pip install datasets
from datasets import Dataset, DatasetDict, load_dataset, IterableDataset
import re
import torch
import torch.nn as nn
import pandas as pd
from transformers import TrainingArguments, Trainer
from transformers import RobertaPreTrainedModel, RobertaModel, RobertaTokenizer
from transformers import DataCollatorWithPadding
from transformers import AutoConfig
import glob
import os
import pyarrow.parquet as pq
from transformers.modeling_outputs import SequenceClassifierOutput
from google.colab import drive

import torch.nn.functional as F

from transformers.modeling_outputs import ModelOutput
from dataclasses import dataclass


drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Function preprocessors from CloneDetectionPipeline
class FunctionPreprocessor:
    def get_function(self, code):
        results = []
        fn_list = re.findall(r"\ndef [a-zA-Z0-9_]+\(", code)
        for fn in fn_list:
            results.append(fn[4:-1].strip())
        return results

    def determine_function(self, code, function_name):
        num = len(re.findall(r"[^a-zA-Z]" + function_name + r"[^a-zA-Z]", code))
        return False if num <= 1 else True

    def delete_function(self, code, name):
        start_id, _ = re.search("def " + name, code).span()
        ptr = start_id
        while ptr < len(code) - 1:
            if code[ptr] == "\n" and re.search("[a-zA-Z]", code[ptr + 1]) is not None:
                break
            ptr += 1
        if ptr != len(code) - 1:
            end_id = ptr
            code = code[:start_id] + code[end_id:]
        return code

    def preprocess(self, code):
        code = "\n" + code
        fn_list = self.get_function(code)
        if len(fn_list) == 0:
            return code
        for fn in fn_list:
            flag = self.determine_function(code, fn)
            if not flag:
                code = self.delete_function(code, fn)
        return code

class AnnotationPreprocessor:
    def delete_annotation(self, code):
        sens = code.split("\n")
        sens_processed = [sen.split("#")[0] for sen in sens]  # Remove inline comments
        return "\n".join(sens_processed)

    def delete_import(self, code):
        sens = code.split("\n")
        sens_processed = [sen for sen in sens if "import" not in sen]  # Remove import statements
        return "\n".join(sens_processed)

    def preprocess(self, code):
        code = self.delete_annotation(code)
        code = self.delete_import(code)
        code = re.sub(r"\s+", " ", code).strip()  # Remove excessive whitespace
        return code

In [None]:
class HuggingFaceCloneDetectionDataset:
    """Custom Iterable Dataset for Hugging Face Trainer"""

    def __init__(self, parquet_loader, function_preprocessor, annotation_preprocessor, tokenizer, max_length=256):
        self.loader = parquet_loader
        self.function_preprocessor = function_preprocessor
        self.annotation_preprocessor = annotation_preprocessor
        self.tokenizer = tokenizer
        self.max_length = max_length

    def preprocess_code(self, code):
        """Apply function & annotation preprocessing"""
        return self.annotation_preprocessor.preprocess(
            self.function_preprocessor.preprocess(code)
        )

    def __iter__(self):
        """Yields dynamically processed & tokenized samples"""
        for batch_df in self.loader:
            for _, row in batch_df.iterrows():
                yield self.process_row(row)

    def process_row(self, row):
        """Preprocesses and tokenizes the two input sequences"""
        code1 = self.preprocess_code(row["code1"])
        code2 = self.preprocess_code(row["code2"])
        label = row["similar"]

        tokens1 = self.tokenizer(code1, max_length=self.max_length, truncation=True, padding="max_length")
        tokens2 = self.tokenizer(code2, max_length=self.max_length, truncation=True, padding="max_length")

        return {
            "input_ids1": tokens1["input_ids"],
            "attention_mask1": tokens1["attention_mask"],
            "input_ids2": tokens2["input_ids"],
            "attention_mask2": tokens2["attention_mask"],
            "labels": label,
        }


class LocalParquetBatchLoader:
    """Memory-efficient batch loader for local Parquet files"""
    def __init__(self, file_paths, batch_size=16):
        self.file_paths = file_paths
        self.batch_size = batch_size
        self.current_file_idx = 0
        self.current_batch_idx = 0
        self.table = None
        self.num_rows = 0

    def load_next_file(self):
        """Loads the next Parquet file into memory in an optimized way"""
        if self.current_file_idx >= len(self.file_paths):
            return None  # No more files

        file_path = self.file_paths[self.current_file_idx]
        print(f"📥 Loading {file_path} from local storage...")

        self.table = pq.read_table(file_path, columns=["code1", "code2", "similar"])
        self.num_rows = self.table.num_rows
        self.current_batch_idx = 0
        self.current_file_idx += 1

    def __iter__(self):
        """Iterates through all files and dynamically loads batches"""
        while self.current_file_idx < len(self.file_paths):
            self.load_next_file()
            if self.table is None:
                break

            while self.current_batch_idx < self.num_rows:
                start = self.current_batch_idx
                end = min(start + self.batch_size, self.num_rows)
                batch = self.table.slice(start, end - start).to_pandas()
                self.current_batch_idx = end
                yield batch




In [None]:
from itertools import islice

DATASET_PATH = "/content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data"

train_files = sorted(glob.glob(os.path.join(DATASET_PATH, "train-*.parquet")))
val_files = sorted(glob.glob(os.path.join(DATASET_PATH, "val-*.parquet")))

# ✅ Initialize preprocessors and tokenizer
function_preprocessor = FunctionPreprocessor()
annotation_preprocessor = AnnotationPreprocessor()
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

# ✅ Use LocalParquetBatchLoader instead of GCS loader
train_loader = LocalParquetBatchLoader(train_files, batch_size=128)
val_loader = LocalParquetBatchLoader(val_files, batch_size=128)

train_dataset = IterableDataset.from_generator(lambda: HuggingFaceCloneDetectionDataset(train_loader, function_preprocessor, annotation_preprocessor, tokenizer))
# ✅ Limit val_loader to 5000 samples
LIMIT_EVAL_SAMPLES = 2000

# ✅ Wrap the val_loader to stop after 5000 samples
def limited_val_loader():
    return islice(val_loader, LIMIT_EVAL_SAMPLES)  # Stops after 5000 samples

# ✅ Create a new `val_dataset` with the limited subset
val_dataset = IterableDataset.from_generator(lambda: HuggingFaceCloneDetectionDataset(
    limited_val_loader(), function_preprocessor, annotation_preprocessor, tokenizer
))
# ✅ Check if the dataset loads correctly
for batch in train_dataset:
    print(batch)  # ✅ Ensure it prints one batch
    break



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00000-of-00009.parquet from local storage...
{'input_ids1': [0, 282, 6, 475, 6, 784, 5457, 5456, 1640, 2544, 6, 8135, 49123, 44154, 49338, 10, 5457, 646, 8458, 1640, 32557, 1640, 2544, 6, 8135, 49123, 44154, 43048, 35122, 13, 18134, 11, 1186, 1640, 282, 46077, 741, 5457, 646, 8458, 1640, 32557, 1640, 2544, 6, 8135, 49123, 44154, 43048, 35122, 13, 18134, 11, 1186, 1640, 119, 46077, 740, 5457, 48395, 288, 13, 939, 11, 1186, 1640, 462, 46077, 13, 1236, 11, 1186, 1640, 282, 46077, 13, 939, 11, 1186, 1640, 282, 3256, 13, 1236, 11, 1186, 1640, 462, 3256, 13, 449, 11, 1186, 1640, 119, 3256, 740, 10975, 118, 46386, 267, 742, 49371, 10, 10975, 118, 46386, 330, 742, 1009, 741, 10975, 330, 46386, 267, 742, 13, 939, 11, 1186, 1640, 282, 3256, 5780, 46469, 43809, 26960, 1640, 32557, 1640, 6031, 6, 740, 10975, 118, 742, 47619, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [None]:
def siamese_data_collator(features):
    """Custom collator to handle Siamese network inputs"""
    batch = {
        "input_ids1": torch.tensor([f["input_ids1"] for f in features], dtype=torch.long),
        "attention_mask1": torch.tensor([f["attention_mask1"] for f in features], dtype=torch.long),
        "input_ids2": torch.tensor([f["input_ids2"] for f in features], dtype=torch.long),
        "attention_mask2": torch.tensor([f["attention_mask2"] for f in features], dtype=torch.long),
        "labels": torch.tensor([f["labels"] for f in features], dtype=torch.float32),
    }
    return batch

import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import RobertaModel, RobertaPreTrainedModel
from transformers.modeling_outputs import ModelOutput
from dataclasses import dataclass
from typing import Optional, Tuple

@dataclass
class SiameseOutput(ModelOutput):
    """Custom Hugging Face-compatible output format for similarity models"""
    loss: Optional[torch.FloatTensor] = None
    logits: Optional[torch.FloatTensor] = None
    similarity_score: torch.FloatTensor = None  # ✅ Explicitly named similarity score
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
    attentions: Optional[Tuple[torch.FloatTensor]] = None

class SiameseModel(RobertaPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.config = config
        self.training_step = 0  # ✅ Keep track of training steps

        # ✅ Load Pretrained RoBERTa
        self.roberta = RobertaModel(config, add_pooling_layer=False)

        # ✅ Projection Layer to Reduce Embedding Dimensionality
        self.net = nn.Sequential(
            nn.Linear(config.hidden_size, config.hidden_size),
            nn.ReLU(),
            nn.Linear(config.hidden_size, config.hidden_size),  # ✅ Project to a smaller feature space
        )

        # # ✅ Loss Functions
        # self.loss_fn_bce = nn.BCEWithLogitsLoss()
        self.loss_fn_cosine = nn.CosineEmbeddingLoss(margin = 0.2)
        self.loss_fn_bce = nn.BCEWithLogitsLoss()

    def last_4_layer_avg(self, model_output):
        layer_indices = [-1, -2, -3, -4]  # Last 4 layers
        stacked_layers = torch.stack([model_output.hidden_states[i] for i in layer_indices])  # (4, batch, seq_len, hidden_dim)
        return stacked_layers.mean(dim=0)[:, 0, :]  # Take mean and use CLS

    def forward(
        self,
        input_ids1=None,
        attention_mask1=None,
        input_ids2=None,
        attention_mask2=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
    ):
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        # ✅ Forward pass through RoBERTa
        outputs1 = self.roberta(input_ids1, attention_mask=attention_mask1, return_dict=return_dict)
        outputs2 = self.roberta(input_ids2, attention_mask=attention_mask2, return_dict=return_dict)

        emb1 = self.last_4_layer_avg(outputs1)
        emb2 = self.last_4_layer_avg(outputs2)

        # ✅ Pass through Projection Layer
        emb1 = self.net(emb1)
        emb2 = self.net(emb2)

        # ✅ Normalize embeddings for cosine similarity
        emb1 = F.normalize(emb1, p=2, dim=-1)
        emb2 = F.normalize(emb2, p=2, dim=-1)

        # ✅ Compute Cosine Similarity (Raw Logits)
        cosine_sim = F.cosine_similarity(emb1, emb2, dim=-1).unsqueeze(1)  # Shape: (batch_size, 1)
        logit_scale = 8  # Moderate scale
        margin = 0.2  # Push non-clones further down
        logits = logit_scale * (cosine_sim - margin)
        logits = logits.squeeze(1)


        loss = None
        if labels is not None:
            loss = self.loss_fn_bce(logits, labels.float())  # ✅ Use BCE loss with logits

        return SiameseOutput(
            loss=loss,
            logits=logits,  # ✅ Return logits
            similarity_score=torch.sigmoid(logits),  # ✅ Compute probability from logits
            hidden_states=outputs1.hidden_states if return_dict else None,
            attentions=outputs1.attentions if return_dict else None,
        )





In [None]:
from transformers import TrainerCallback
import torch

class ClassificationErrorCallback(TrainerCallback):
    def on_step_end(self, args, state, control, **kwargs):
        """Computes classification error in real-time on training batches."""
        trainer = kwargs.get("trainer", None)
        if trainer is None or trainer.model is None:
            return

        # Get last training batch
        last_batch = trainer.get_train_dataloader().dataset[-args.per_device_train_batch_size:]
        device = trainer.model.device
        model = trainer.model

        # Move batch to device
        batch = {k: torch.tensor(v).to(device) for k, v in last_batch.items()}

        # Run forward pass
        model.eval()
        with torch.no_grad():
            outputs = model(**batch)
            logits = outputs.logits  # ✅ Get logits from SiameseOutput
            labels = batch["labels"]  # ✅ Get ground truth labels

        # Convert logits to probabilities
        probs = torch.sigmoid(logits)

        # Apply threshold (0.5) for classification
        predictions = (probs > 0.5).long()

        # Compute classification error
        errors = (predictions != labels).float().mean().item()

        # Log the classification error
        print(f"🔹 Step {state.global_step}: Classification Error = {errors:.4f}")
        trainer.log({"classification_error": errors})

from sklearn.metrics import accuracy_score
import numpy as np

def compute_metrics(eval_pred):
    """Compute classification error instead of loss."""
    logits, labels = eval_pred  # ✅ Unpack logits and labels

    # ✅ Ensure labels are NumPy arrays
    labels = np.array(labels)

    # ✅ Convert logits to probabilities using sigmoid
    probs = torch.sigmoid(torch.tensor(logits)).numpy()

    # ✅ Convert probabilities to binary predictions (threshold = 0.5)
    preds = (probs > 0.5).astype(int)

    # ✅ Compute classification error (1 - accuracy)
    accuracy = accuracy_score(labels, preds)
    classification_error = 1 - accuracy

    return {"classification_error": classification_error}



In [None]:
# import json
# from safetensors.torch import load_file
# import torch
# from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
# from transformers import get_linear_schedule_with_warmup


# config = AutoConfig.from_pretrained("roberta-base")
# model = SiameseModel(config)
# model.to("cuda" if torch.cuda.is_available() else "cpu")  # Move to GPU if available

# # ✅ Define warm-up steps (first 1000 batches)
# num_warmup_steps = 600  # Gradually increase LR over 600 batches

# # ✅ 5. Load tokenizer
# tokenizer = AutoTokenizer.from_pretrained("roberta-base")

# # ✅ 6. Set up TrainingArguments
# training_args = TrainingArguments(
#     output_dir="/content/drive/MyDrive/new_siamese_model",
#     logging_dir="/content/drive/MyDrive/new_logs",
#     save_strategy="steps",
#     logging_strategy="steps",
#     logging_steps=200,  # ✅ Logs every 200 steps
#     save_steps=1000,
#     per_device_train_batch_size=64,
#     per_device_eval_batch_size=364,
#     num_train_epochs=2,
#     max_steps=320000,
#     load_best_model_at_end=False,
#     report_to="none",
#     logging_first_step=True
# )

# # ✅ 7. Reset optimizer (IGNORE previous optimizer state)
# optimizer = torch.optim.AdamW([
#     {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.01},  # RoBERTa layers
#     {"params": model.net.parameters(), "lr": 2e-4, "weight_decay": 0.01},  # Custom layers
# ], eps=1e-8)
# # ✅ Learning rate scheduler with warm-up

# lr_scheduler = get_linear_schedule_with_warmup(
#     optimizer,
#     num_warmup_steps=num_warmup_steps,
#     num_training_steps=320000
# )


# # ✅ 9. Initialize Trainer (Manually set optimizer)
# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=train_dataset,
#     eval_dataset=val_dataset,
#     data_collator=siamese_data_collator,
#     optimizers=(optimizer, lr_scheduler)  # 🚀 Reset optimizer state, Ignore previous optimizer
# )


# # ✅ 11. Resume training from the latest model weights, but with a new optimizer
# trainer.train(resume_from_checkpoint=False)  # 🚀 Ignore optimizer mismatch






📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00001-of-00009.parquet from local storage...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.8453
200,0.7071
400,0.6303
600,0.6024
800,0.6036
1000,0.5862
1200,0.5779
1400,0.5796


KeyboardInterrupt: 

In [None]:
import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup
# ✅ 1. Load model config
model_checkpoint = "/content/drive/MyDrive/new_siamese_model/checkpoint-1000"
config = AutoConfig.from_pretrained(model_checkpoint)
model = SiameseModel(config)

# ✅ 2. Load model weights from `safetensors`
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_weights = load_file(f"{model_checkpoint}/model.safetensors", device="cpu")

# ✅ 3. Apply weights correctly
missing_keys, unexpected_keys = model.load_state_dict(model_weights, strict=False)
print(f"🔹 Missing keys: {missing_keys}")  # Should be empty if properly loaded
print(f"🔹 Unexpected keys: {unexpected_keys}")  # Should be empty if properly loaded

# ✅ 4. Move model to GPU
model.to(device)
# ✅ Define warm-up steps (first 1000 batches)
num_warmup_steps = 600  # Gradually increase LR over 600 batches

# ✅ 5. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/new_siamese_model",
    per_device_train_batch_size=64,  # ✅ Using larger batch size
    per_device_eval_batch_size=64,
    num_train_epochs=2,
    max_steps=320000,
    fp16=True,  # ✅ Enables mixed precision training
    save_strategy="steps",
    logging_strategy="steps",
    logging_steps=200,
    save_steps=1000,
    load_best_model_at_end=False,
    report_to="none",
    logging_first_step=True
)


# ✅ 7. Reset optimizer (IGNORE previous optimizer state)
optimizer = AdamW([
    {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.01},  # RoBERTa layers
    {"params": model.net.parameters(), "lr": 2e-4, "weight_decay": 0.01},  # Custom layers
], eps=1e-8)
# ✅ Learning rate scheduler with warm-up

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=320000
)

# ✅ 9. Initialize Trainer (Manually set optimizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=siamese_data_collator,
    optimizers=(optimizer, lr_scheduler)  # 🚀 Reset optimizer state, Ignore previous optimizer
)


# ✅ 11. Resume training from the latest model weights, but with a new optimizer
trainer.train(resume_from_checkpoint=False)  # 🚀 Ignore optimizer mismatch

🔹 Missing keys: []
🔹 Unexpected keys: []




📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00001-of-00009.parquet from local storage...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.6019
200,0.5752
400,0.5725
600,0.5666
800,0.5729
1000,0.578
1200,0.5774
1400,0.568
1600,0.5652
1800,0.5624


KeyboardInterrupt: 

In [None]:
import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup
# ✅ 1. Load model config
model_checkpoint = "/content/drive/MyDrive/new_siamese_model/checkpoint-2000"
config = AutoConfig.from_pretrained(model_checkpoint)
model = SiameseModel(config)

# ✅ 2. Load model weights from `safetensors`
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_weights = load_file(f"{model_checkpoint}/model.safetensors", device="cpu")

# ✅ 3. Apply weights correctly
missing_keys, unexpected_keys = model.load_state_dict(model_weights, strict=False)
print(f"🔹 Missing keys: {missing_keys}")  # Should be empty if properly loaded
print(f"🔹 Unexpected keys: {unexpected_keys}")  # Should be empty if properly loaded

# ✅ 4. Move model to GPU
model.to(device)
# ✅ Define warm-up steps (first 1000 batches)
num_warmup_steps = 600  # Gradually increase LR over 600 batches

# ✅ 5. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/new_siamese_model",
    per_device_train_batch_size=128,  # ✅ Using larger batch size
    per_device_eval_batch_size=128,
    num_train_epochs=2,
    max_steps=320000,
    fp16=True,  # ✅ Enables mixed precision training
    save_strategy="steps",
    logging_strategy="steps",
    logging_steps=200,
    save_steps=1000,
    load_best_model_at_end=False,
    report_to="none",
    logging_first_step=True
)


# ✅ 7. Reset optimizer (IGNORE previous optimizer state)
optimizer = AdamW([
    {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.01},  # RoBERTa layers
    {"params": model.net.parameters(), "lr": 2e-4, "weight_decay": 0.01},  # Custom layers
], eps=1e-8)
# ✅ Learning rate scheduler with warm-up

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=320000
)

# ✅ 9. Initialize Trainer (Manually set optimizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=siamese_data_collator,
    optimizers=(optimizer, lr_scheduler)  # 🚀 Reset optimizer state, Ignore previous optimizer
)


# ✅ 11. Resume training from the latest model weights, but with a new optimizer
trainer.train(resume_from_checkpoint=False)  # 🚀 Ignore optimizer mismatch

🔹 Missing keys: []
🔹 Unexpected keys: []




📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00001-of-00009.parquet from local storage...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.6292
200,0.5494
400,0.542
600,0.5508
800,0.549
1000,0.5449
1200,0.5453
1400,0.5453
1600,0.5429
1800,0.5443


📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00002-of-00009.parquet from local storage...
📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00003-of-00009.parquet from local storage...


KeyboardInterrupt: 

In [None]:
import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup
# ✅ 1. Load model config
model_checkpoint = "/content/drive/MyDrive/new_siamese_model/checkpoint-11000"
config = AutoConfig.from_pretrained(model_checkpoint)
model = SiameseModel(config)

# ✅ 2. Load model weights from `safetensors`
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_weights = load_file(f"{model_checkpoint}/model.safetensors", device="cpu")

# ✅ 3. Apply weights correctly
missing_keys, unexpected_keys = model.load_state_dict(model_weights, strict=False)
print(f"🔹 Missing keys: {missing_keys}")  # Should be empty if properly loaded
print(f"🔹 Unexpected keys: {unexpected_keys}")  # Should be empty if properly loaded

# ✅ 4. Move model to GPU
model.to(device)
# ✅ Define warm-up steps (first 1000 batches)
num_warmup_steps = 200  # Gradually increase LR over 600 batches

# ✅ 5. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/new_siamese_model",
    per_device_train_batch_size=128,  # ✅ Using larger batch size
    per_device_eval_batch_size=128,
    num_train_epochs=2,
    max_steps=320000,
    fp16=True,  # ✅ Enables mixed precision training
    save_strategy="steps",
    logging_strategy="steps",
    logging_steps=200,
    save_steps=1000,
    load_best_model_at_end=False,
    report_to="none",
    logging_first_step=True
)


# ✅ 7. Reset optimizer (IGNORE previous optimizer state)
optimizer = AdamW([
    {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.0001},  # RoBERTa layers
    {"params": model.net.parameters(), "lr": 1e-3, "weight_decay": 0.0001},  # Custom layers
], eps=1e-8)
# ✅ Learning rate scheduler with warm-up

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=30000
)

# ✅ 9. Initialize Trainer (Manually set optimizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=siamese_data_collator,
    optimizers=(optimizer, lr_scheduler)  # 🚀 Reset optimizer state, Ignore previous optimizer
)


# ✅ 11. Resume training from the latest model weights, but with a new optimizer
trainer.train(resume_from_checkpoint=False)  # 🚀 Ignore optimizer mismatch

🔹 Missing keys: []
🔹 Unexpected keys: []




📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00003-of-00009.parquet from local storage...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.5169
200,0.5226
400,0.5271
600,0.5248
800,0.5237
1000,0.525
1200,0.5196
1400,0.5244
1600,0.5209
1800,0.5195


📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00004-of-00009.parquet from local storage...


KeyboardInterrupt: 

In [None]:
import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup


config = AutoConfig.from_pretrained("roberta-base")
model = SiameseModel(config)
model.to("cuda" if torch.cuda.is_available() else "cpu")  # Move to GPU if available

# ✅ Define warm-up steps (first 1000 batches)
num_warmup_steps = 1000  # Gradually increase LR over 600 batches

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/new_siamese_model",
    per_device_train_batch_size=64,  # ✅ Using larger batch size
    per_device_eval_batch_size=64,
    num_train_epochs=2,
    max_steps=50000,
    fp16=True,  # ✅ Enables mixed precision training
    save_strategy="steps",
    logging_strategy="steps",
    logging_steps=200,
    save_steps=2000,
    load_best_model_at_end=False,
    report_to="none",
    logging_first_step=True
)


# ✅ 7. Reset optimizer (IGNORE previous optimizer state)
optimizer = AdamW([
    {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.001},  # RoBERTa layers
    {"params": model.net.parameters(), "lr": 1e-3, "weight_decay": 0.001},  # Custom layers
], eps=1e-8)
# ✅ Learning rate scheduler with warm-up

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=50000
)

# ✅ 9. Initialize Trainer (Manually set optimizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=siamese_data_collator,
    optimizers=(optimizer, lr_scheduler)  # 🚀 Reset optimizer state, Ignore previous optimizer
)


# ✅ 11. Resume training from the latest model weights, but with a new optimizer
trainer.train()  # 🚀 Ignore optimizer mismatch



📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00001-of-00009.parquet from local storage...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,1.8275
200,0.7824
400,0.6542
600,0.6072
800,0.5854
1000,0.538
1200,0.53
1400,0.4973


Step,Training Loss
1,1.8275
200,0.7824
400,0.6542
600,0.6072
800,0.5854
1000,0.538
1200,0.53
1400,0.4973
1600,0.4658
1800,0.4444


In [None]:
import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup
# ✅ 1. Load model config
model_checkpoint = "/content/drive/MyDrive/new_siamese_model/checkpoint-2000"
config = AutoConfig.from_pretrained(model_checkpoint)
model = SiameseModel(config)

# ✅ 2. Load model weights from `safetensors`
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_weights = load_file(f"{model_checkpoint}/model.safetensors", device="cpu")

# ✅ 3. Apply weights correctly
missing_keys, unexpected_keys = model.load_state_dict(model_weights, strict=False)
print(f"🔹 Missing keys: {missing_keys}")  # Should be empty if properly loaded
print(f"🔹 Unexpected keys: {unexpected_keys}")  # Should be empty if properly loaded

# ✅ 4. Move model to GPU
model.to(device)
# ✅ Define warm-up steps (first 1000 batches)
num_warmup_steps = 200  # Gradually increase LR over 600 batches

# ✅ 5. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/new_siamese_model",
    per_device_train_batch_size=64,  # ✅ Using larger batch size
    per_device_eval_batch_size=64,
    num_train_epochs=2,
    max_steps=320000,
    fp16=True,  # ✅ Enables mixed precision training
    save_strategy="steps",
    logging_strategy="steps",
    logging_steps=200,
    save_steps=1000,
    load_best_model_at_end=False,
    report_to="none",
    logging_first_step=True
)


# ✅ 7. Reset optimizer (IGNORE previous optimizer state)
optimizer = AdamW([
    {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.0001},  # RoBERTa layers
    {"params": model.net.parameters(), "lr": 2e-4, "weight_decay": 0.001},  # Custom layers
], eps=1e-8)
# ✅ Learning rate scheduler with warm-up

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=40000
)

# ✅ 9. Initialize Trainer (Manually set optimizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=siamese_data_collator,
    optimizers=(optimizer, lr_scheduler)  # 🚀 Reset optimizer state, Ignore previous optimizer
)


# ✅ 11. Resume training from the latest model weights, but with a new optimizer
trainer.train(resume_from_checkpoint=False)  # 🚀 Ignore optimizer mismatch

🔹 Missing keys: []
🔹 Unexpected keys: []




📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00001-of-00009.parquet from local storage...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.7645
200,0.5277
400,0.4858
600,0.4441
800,0.4292
1000,0.4362
1200,0.4441
1400,0.4271
1600,0.4131
1800,0.4306


KeyboardInterrupt: 

In [None]:
import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup

# ✅ 1. Load model config
model_checkpoint = "/content/drive/MyDrive/new_siamese_model/checkpoint-7000"
config = AutoConfig.from_pretrained(model_checkpoint)
model = SiameseModel(config)

# ✅ 2. Load model weights from `safetensors`
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_weights = load_file(f"{model_checkpoint}/model.safetensors", device="cpu")

# ✅ 3. Apply weights correctly
missing_keys, unexpected_keys = model.load_state_dict(model_weights, strict=False)
print(f"🔹 Missing keys: {missing_keys}")  # Should be empty if properly loaded
print(f"🔹 Unexpected keys: {unexpected_keys}")  # Should be empty if properly loaded

config = AutoConfig.from_pretrained("roberta-base", output_hidden_states=True)

model = SiameseModel(config)
model.to("cuda" if torch.cuda.is_available() else "cpu")  # Move to GPU if available

# ✅ Define warm-up steps (first 1000 batches)
num_warmup_steps = 200  # Gradually increase LR over 600 batches

# ✅ 5. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/new_siamese_model",
    per_device_train_batch_size=64,  # ✅ Using larger batch size
    per_device_eval_batch_size=64,
    num_train_epochs=2,
    max_steps=320000,
    fp16=True,  # ✅ Enables mixed precision training
    save_strategy="steps",
    logging_strategy="steps",
    logging_steps=200,
    save_steps=1000,
    load_best_model_at_end=False,
    report_to="none",
    logging_first_step=True
)


# ✅ 7. Reset optimizer (IGNORE previous optimizer state)
optimizer = AdamW([
    {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.0001},  # RoBERTa layers
    {"params": model.net.parameters(), "lr": 2e-4, "weight_decay": 0.001},  # Custom layers
], eps=1e-8)
# ✅ Learning rate scheduler with warm-up

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=40000
)

# ✅ 9. Initialize Trainer (Manually set optimizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=siamese_data_collator,
    optimizers=(optimizer, lr_scheduler)  # 🚀 Reset optimizer state, Ignore previous optimizer
)


# ✅ 11. Resume training from the latest model weights, but with a new optimizer
trainer.train()  # 🚀 Ignore optimizer mismatch



📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00001-of-00009.parquet from local storage...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,1.839
200,0.7338
400,0.5812
600,0.5043
800,0.453
1000,0.4394
1200,0.4241
1400,0.4117
1600,0.3962
1800,0.3775


In [None]:
import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup

# ✅ 1. Load model config
model_checkpoint = "/content/drive/MyDrive/new_siamese_model/checkpoint-3000"
config = AutoConfig.from_pretrained(model_checkpoint)
model = SiameseModel(config)

# ✅ 2. Load model weights from `safetensors`
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_weights = load_file(f"{model_checkpoint}/model.safetensors", device="cpu")

# ✅ 3. Apply weights correctly
missing_keys, unexpected_keys = model.load_state_dict(model_weights, strict=False)
print(f"🔹 Missing keys: {missing_keys}")  # Should be empty if properly loaded
print(f"🔹 Unexpected keys: {unexpected_keys}")  # Should be empty if properly loaded

# ✅ Define warm-up steps (first 1000 batches)
num_warmup_steps = 200  # Gradually increase LR over 600 batches

# ✅ 5. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/new_siamese_model",
    per_device_train_batch_size=64,  # ✅ Using larger batch size
    per_device_eval_batch_size=64,
    num_train_epochs=2,
    max_steps=50000,
    fp16=True,  # ✅ Enables mixed precision training
    save_strategy="steps",
    logging_strategy="steps",
    logging_steps=200,
    save_steps=1000,
    load_best_model_at_end=False,
    report_to="none",
    logging_first_step=True
)


# ✅ 7. Reset optimizer (IGNORE previous optimizer state)
optimizer = AdamW([
    {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.0001},  # RoBERTa layers
    {"params": model.net.parameters(), "lr": 2e-4, "weight_decay": 0.001},  # Custom layers
], eps=1e-8)
# ✅ Learning rate scheduler with warm-up

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=40000
)

# ✅ 9. Initialize Trainer (Manually set optimizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=siamese_data_collator,
    optimizers=(optimizer, lr_scheduler)  # 🚀 Reset optimizer state, Ignore previous optimizer
)


# ✅ 11. Resume training from the latest model weights, but with a new optimizer
trainer.train()  # 🚀 Ignore optimizer mismatch

🔹 Missing keys: []
🔹 Unexpected keys: []




📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00001-of-00009.parquet from local storage...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.4123
200,0.2705
400,0.2821
600,0.2735
800,0.2736
1000,0.2813
1200,0.2806
1400,0.2766
1600,0.2716
1800,0.2813


KeyboardInterrupt: 

In [None]:
import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup

# ✅ 1. Load model config
model_checkpoint = "/content/drive/MyDrive/new_siamese_model/checkpoint-3000"
config = AutoConfig.from_pretrained(model_checkpoint)
model = SiameseModel(config)

# ✅ 2. Load model weights from `safetensors`
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_weights = load_file(f"{model_checkpoint}/model.safetensors", device="cpu")

# ✅ 3. Apply weights correctly
missing_keys, unexpected_keys = model.load_state_dict(model_weights, strict=False)
print(f"🔹 Missing keys: {missing_keys}")  # Should be empty if properly loaded
print(f"🔹 Unexpected keys: {unexpected_keys}")  # Should be empty if properly loaded

# ✅ Define warm-up steps (first 1000 batches)
num_warmup_steps = 200  # Gradually increase LR over 600 batches

# ✅ 5. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/new_siamese_model",
    per_device_train_batch_size=128,
    per_device_eval_batch_size=128,
    num_train_epochs=2,
    max_steps=50000,
    fp16=True,
    save_strategy="steps",
    logging_strategy="steps",
    evaluation_strategy="steps",  # ✅ Evaluate every few steps
    eval_steps=1000,  # ✅ Log classification error every 1000 steps
    logging_steps=200,
    save_steps=1000,
    load_best_model_at_end=False,
    report_to="none",
    logging_first_step=True
)



# ✅ 7. Reset optimizer (IGNORE previous optimizer state)
optimizer = AdamW([
    {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.0001},  # RoBERTa layers
    {"params": model.net.parameters(), "lr": 2e-4, "weight_decay": 0.001},  # Custom layers
], eps=1e-8)
# ✅ Learning rate scheduler with warm-up

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=40000
)

# ✅ 9. Initialize Trainer (Manually set optimizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,  # ✅ Now only evaluates on 5000 samples!
    data_collator=siamese_data_collator,
    optimizers=(optimizer, lr_scheduler),
    compute_metrics=compute_metrics,  # ✅ Use classification error instead of loss!
)



# ✅ 11. Resume training from the latest model weights, but with a new optimizer
trainer.train()  # 🚀 Ignore optimizer mismatch

🔹 Missing keys: []
🔹 Unexpected keys: []




📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00002-of-00009.parquet from local storage...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.3729
200,0.2266
400,0.2387
600,0.226
800,0.2259
1000,0.2154
1200,0.2189
1400,0.2132
1600,0.2097
1800,0.2052


KeyboardInterrupt: 

In [None]:

import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup

# ✅ 1. Load model config
model_checkpoint = "/content/drive/MyDrive/new_siamese_model/checkpoint-3000"
config = AutoConfig.from_pretrained(model_checkpoint)
model = SiameseModel(config)

# ✅ 2. Load model weights from `safetensors`
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_weights = load_file(f"{model_checkpoint}/model.safetensors", device="cpu")

# ✅ 3. Apply weights correctly
missing_keys, unexpected_keys = model.load_state_dict(model_weights, strict=False)
print(f"🔹 Missing keys: {missing_keys}")  # Should be empty if properly loaded
print(f"🔹 Unexpected keys: {unexpected_keys}")  # Should be empty if properly loaded

# ✅ Define warm-up steps (first 1000 batches)
num_warmup_steps = 200  # Gradually increase LR over 600 batches

# ✅ 5. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/new_siamese_model",
    per_device_train_batch_size=64,  # ✅ Using larger batch size
    per_device_eval_batch_size=64,
    gradient_accumulation_steps=2,
    num_train_epochs=2,
    max_steps=50000,
    fp16=True,  # ✅ Enables mixed precision training
    save_strategy="steps",
    logging_strategy="steps",
    logging_steps=200,
    save_steps=1000,
    load_best_model_at_end=False,
    report_to="none",
    logging_first_step=True
)


# ✅ 7. Reset optimizer (IGNORE previous optimizer state)
optimizer = AdamW([
    {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.0001},  # RoBERTa layers
    {"params": model.net.parameters(), "lr": 2e-4, "weight_decay": 0.001},  # Custom layers
], eps=1e-8)
# ✅ Learning rate scheduler with warm-up

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=40000
)

# ✅ 9. Initialize Trainer (Manually set optimizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,  # ✅ Now only evaluates on 1000 samples!
    data_collator=siamese_data_collator,
    optimizers=(optimizer, lr_scheduler),
)


trainer.train()  # 🚀 Ignore optimizer mismatch

🔹 Missing keys: []
🔹 Unexpected keys: []




📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00001-of-00009.parquet from local storage...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.3226
200,0.19
400,0.1965
600,0.1926
800,0.1865
1000,0.1939
1200,0.194
1400,0.189
1600,0.1846
1800,0.1957


KeyboardInterrupt: 

In [None]:

import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup

# ✅ 1. Load model config
model_checkpoint = "/content/drive/MyDrive/new_siamese_model/checkpoint-3000"
config = AutoConfig.from_pretrained(model_checkpoint)
model = SiameseModel(config)

# ✅ 2. Load model weights from `safetensors`
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_weights = load_file(f"{model_checkpoint}/model.safetensors", device="cpu")

# ✅ 3. Apply weights correctly
missing_keys, unexpected_keys = model.load_state_dict(model_weights, strict=False)
print(f"🔹 Missing keys: {missing_keys}")  # Should be empty if properly loaded
print(f"🔹 Unexpected keys: {unexpected_keys}")  # Should be empty if properly loaded

# ✅ Define warm-up steps (first 1000 batches)
num_warmup_steps = 200  # Gradually increase LR over 600 batches

# ✅ 5. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/new_siamese_model",
    per_device_train_batch_size=64,  # ✅ Using larger batch size
    per_device_eval_batch_size=64,
    gradient_accumulation_steps=2,
    num_train_epochs=2,
    max_steps=50000,
    fp16=True,  # ✅ Enables mixed precision training
    save_strategy="steps",
    logging_strategy="steps",
    logging_steps=200,
    save_steps=1000,
    load_best_model_at_end=False,
    report_to="none",
    logging_first_step=True
)


# ✅ 7. Reset optimizer (IGNORE previous optimizer state)
optimizer = AdamW([
    {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.0001},  # RoBERTa layers
    {"params": model.net.parameters(), "lr": 5e-5, "weight_decay": 0.001},  # Custom layers
], eps=1e-8)
# ✅ Learning rate scheduler with warm-up

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=40000
)

# ✅ 9. Initialize Trainer (Manually set optimizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,  # ✅ Now only evaluates on 1000 samples!
    data_collator=siamese_data_collator,
    optimizers=(optimizer, lr_scheduler),
)


trainer.train()  # 🚀 Ignore optimizer mismatch

🔹 Missing keys: []
🔹 Unexpected keys: []




📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00002-of-00009.parquet from local storage...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.2355
200,0.1803
400,0.1872
600,0.1751
800,0.1715
1000,0.171
1200,0.1746
1400,0.1695
1600,0.1686
1800,0.1638


📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00003-of-00009.parquet from local storage...
📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00004-of-00009.parquet from local storage...


KeyboardInterrupt: 

In [None]:

import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup

# ✅ 1. Load model config
model_checkpoint = "/content/drive/MyDrive/new_siamese_model/checkpoint-16000"
config = AutoConfig.from_pretrained(model_checkpoint)
model = SiameseModel(config)

# ✅ 2. Load model weights from `safetensors`
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_weights = load_file(f"{model_checkpoint}/model.safetensors", device="cpu")

# ✅ 3. Apply weights correctly
missing_keys, unexpected_keys = model.load_state_dict(model_weights, strict=False)
print(f"🔹 Missing keys: {missing_keys}")  # Should be empty if properly loaded
print(f"🔹 Unexpected keys: {unexpected_keys}")  # Should be empty if properly loaded

# ✅ Define warm-up steps (first 1000 batches)
num_warmup_steps = 200  # Gradually increase LR over 600 batches

# ✅ 5. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/new_siamese_model",
    per_device_train_batch_size=128,  # ✅ Using larger batch size
    per_device_eval_batch_size=128,
    num_train_epochs=2,
    max_steps=50000,
    fp16=True,  # ✅ Enables mixed precision training
    save_strategy="steps",
    logging_strategy="steps",
    logging_steps=200,
    save_steps=1000,
    load_best_model_at_end=False,
    report_to="none",
    logging_first_step=True
)


# ✅ 7. Reset optimizer (IGNORE previous optimizer state)
optimizer = AdamW([
    {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.0001},  # RoBERTa layers
    {"params": model.net.parameters(), "lr": 5e-5, "weight_decay": 0.001},  # Custom layers
], eps=1e-8)
# ✅ Learning rate scheduler with warm-up

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=40000
)

# ✅ 9. Initialize Trainer (Manually set optimizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,  # ✅ Now only evaluates on 1000 samples!
    data_collator=siamese_data_collator,
    optimizers=(optimizer, lr_scheduler),
)


trainer.train()  # 🚀 Ignore optimizer mismatch

🔹 Missing keys: []
🔹 Unexpected keys: []




📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00001-of-00009.parquet from local storage...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.1456
200,0.1106
400,0.1106
600,0.1136
800,0.1106
1000,0.1164
1200,0.1099
1400,0.1111
1600,0.1086
1800,0.1122


📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00002-of-00009.parquet from local storage...
📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00003-of-00009.parquet from local storage...
📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00004-of-00009.parquet from local storage...
📥 Loading /content/drive/My Drive/4YPdataset/1-fold-clone-detection-600k-5fold/data/train-00005-of-00009.parquet from local storage...


KeyboardInterrupt: 

In [None]:

import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup


import torch
import pandas as pd
import pyarrow.parquet as pq
from tqdm import tqdm
from transformers import AutoTokenizer

# ✅ 1. Load model config
model_checkpoint = "/content/drive/MyDrive/new_siamese_model/checkpoint-20000"
config = AutoConfig.from_pretrained(model_checkpoint)
model = SiameseModel(config)

# ✅ 2. Load model weights from `safetensors`
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_weights = load_file(f"{model_checkpoint}/model.safetensors", device="cpu")

# ✅ 3. Apply weights correctly
missing_keys, unexpected_keys = model.load_state_dict(model_weights, strict=False)
print(f"🔹 Missing keys: {missing_keys}")  # Should be empty if properly loaded
print(f"🔹 Unexpected keys: {unexpected_keys}")  # Should be empty if properly loaded
model.to(device)
model.eval()  # Put model in evaluation mode

# ✅ Set up dataset loader
file_paths = ["//content/drive/MyDrive/4YPdataset/1-fold-clone-detection-600k-5fold/data/val-00000-of-00003.parquet"]  # Replace with actual Parquet file paths
batch_loader = LocalParquetBatchLoader(file_paths, batch_size=64)
# ✅ Initialize preprocessors and tokenizer
function_preprocessor = FunctionPreprocessor()
annotation_preprocessor = AnnotationPreprocessor()
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
# ✅ Track performance metrics
total_samples = 14400
processed_samples = 0
total_correct = 0
total_batches = 0

# ✅ Process dataset in batches
with torch.no_grad():  # Disable gradient calculation for faster inference
    for batch in tqdm(batch_loader, desc="Evaluating model"):
        if processed_samples >= total_samples:
            break  # Stop after 3200 samples

        # ✅ Convert DataFrame to input dictionary
        input_data = HuggingFaceCloneDetectionDataset(None, function_preprocessor, annotation_preprocessor, tokenizer)
        batch_dicts = [input_data.process_row(row) for _, row in batch.iterrows()]

        # ✅ Convert to PyTorch tensors & move everything to `device`
        batch_inputs = {
            "input_ids1": torch.tensor([d["input_ids1"] for d in batch_dicts], dtype=torch.long).to(device),
            "attention_mask1": torch.tensor([d["attention_mask1"] for d in batch_dicts], dtype=torch.long).to(device),
            "input_ids2": torch.tensor([d["input_ids2"] for d in batch_dicts], dtype=torch.long).to(device),
            "attention_mask2": torch.tensor([d["attention_mask2"] for d in batch_dicts], dtype=torch.long).to(device),
        }
        labels = torch.tensor([d["labels"] for d in batch_dicts], dtype=torch.float).to(device)  # ✅ Move to `device`

        # ✅ Run model inference
        outputs = model(**batch_inputs)
        similarity_scores = outputs.similarity_score.cpu().numpy()  # Sigmoid output
        logits = outputs.logits.cpu().numpy()  # Raw logit values
        preds = (similarity_scores >= 0.5).astype(int)  # Convert to binary labels

        # ✅ Compute batch classification error
        batch_correct = (preds == labels.cpu().numpy()).sum()
        batch_total = labels.shape[0]
        batch_error = 1 - (batch_correct / batch_total)

        # ✅ Update overall metrics
        total_correct += batch_correct
        processed_samples += batch_total
        total_batches += 1

        # ✅ Print batch results
        print(f"\n🔹 **Batch {total_batches}:**")
        print(f"   ✅ Avg Similarity Score: {similarity_scores.mean():.4f}")
        print(f"   ✅ Max Logits: {logits.max():.4f}, Min Logits: {logits.min():.4f}")
        print(f"   ❌ Batch Classification Error: {batch_error:.4%}")

# ✅ Final Evaluation Summary
overall_error = 1 - (total_correct / processed_samples)
print("\n🔹 **Final Model Evaluation:**")
print(f"   🔹 Total Samples Processed: {processed_samples}")
print(f"   🔹 Total Batches: {total_batches}")
print(f"   ✅ Overall Classification Error: {overall_error:.4%}")


🔹 Missing keys: []
🔹 Unexpected keys: []


Evaluating model: 0it [00:00, ?it/s]

📥 Loading //content/drive/MyDrive/4YPdataset/1-fold-clone-detection-600k-5fold/data/val-00000-of-00003.parquet from local storage...


Evaluating model: 1it [00:00,  1.35it/s]


🔹 **Batch 1:**
   ✅ Avg Similarity Score: 0.4616
   ✅ Max Logits: 6.4000, Min Logits: -2.0648
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 2it [00:01,  1.71it/s]


🔹 **Batch 2:**
   ✅ Avg Similarity Score: 0.5220
   ✅ Max Logits: 6.4000, Min Logits: -2.2456
   ❌ Batch Classification Error: 6.2500%


Evaluating model: 3it [00:01,  1.83it/s]


🔹 **Batch 3:**
   ✅ Avg Similarity Score: 0.3794
   ✅ Max Logits: 6.3344, Min Logits: -2.2899
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 4it [00:02,  1.91it/s]


🔹 **Batch 4:**
   ✅ Avg Similarity Score: 0.4626
   ✅ Max Logits: 6.4000, Min Logits: -2.5436
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 5it [00:02,  1.99it/s]


🔹 **Batch 5:**
   ✅ Avg Similarity Score: 0.4352
   ✅ Max Logits: 6.3993, Min Logits: -2.3420
   ❌ Batch Classification Error: 7.8125%


Evaluating model: 6it [00:03,  2.05it/s]


🔹 **Batch 6:**
   ✅ Avg Similarity Score: 0.4922
   ✅ Max Logits: 6.3891, Min Logits: -2.9697
   ❌ Batch Classification Error: 9.3750%


Evaluating model: 7it [00:03,  2.05it/s]


🔹 **Batch 7:**
   ✅ Avg Similarity Score: 0.4317
   ✅ Max Logits: 6.3248, Min Logits: -2.3706
   ❌ Batch Classification Error: 29.6875%


Evaluating model: 8it [00:04,  2.06it/s]


🔹 **Batch 8:**
   ✅ Avg Similarity Score: 0.4178
   ✅ Max Logits: 6.3999, Min Logits: -2.5784
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 9it [00:04,  2.11it/s]


🔹 **Batch 9:**
   ✅ Avg Similarity Score: 0.4658
   ✅ Max Logits: 6.3999, Min Logits: -2.3524
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 10it [00:05,  2.10it/s]


🔹 **Batch 10:**
   ✅ Avg Similarity Score: 0.4443
   ✅ Max Logits: 6.4000, Min Logits: -2.4525
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 11it [00:05,  2.10it/s]


🔹 **Batch 11:**
   ✅ Avg Similarity Score: 0.4535
   ✅ Max Logits: 6.3805, Min Logits: -2.1025
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 12it [00:05,  2.11it/s]


🔹 **Batch 12:**
   ✅ Avg Similarity Score: 0.5066
   ✅ Max Logits: 6.3996, Min Logits: -2.2942
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 13it [00:06,  2.11it/s]


🔹 **Batch 13:**
   ✅ Avg Similarity Score: 0.4292
   ✅ Max Logits: 6.4000, Min Logits: -2.6419
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 14it [00:06,  2.08it/s]


🔹 **Batch 14:**
   ✅ Avg Similarity Score: 0.3769
   ✅ Max Logits: 6.4000, Min Logits: -2.2943
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 15it [00:07,  2.08it/s]


🔹 **Batch 15:**
   ✅ Avg Similarity Score: 0.4552
   ✅ Max Logits: 6.3621, Min Logits: -2.1095
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 16it [00:07,  2.11it/s]


🔹 **Batch 16:**
   ✅ Avg Similarity Score: 0.4257
   ✅ Max Logits: 6.4000, Min Logits: -2.3688
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 17it [00:08,  2.09it/s]


🔹 **Batch 17:**
   ✅ Avg Similarity Score: 0.4176
   ✅ Max Logits: 6.3788, Min Logits: -2.7410
   ❌ Batch Classification Error: 29.6875%


Evaluating model: 18it [00:08,  2.10it/s]


🔹 **Batch 18:**
   ✅ Avg Similarity Score: 0.4321
   ✅ Max Logits: 6.3470, Min Logits: -2.6342
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 19it [00:09,  2.12it/s]


🔹 **Batch 19:**
   ✅ Avg Similarity Score: 0.4641
   ✅ Max Logits: 6.3997, Min Logits: -2.1657
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 20it [00:09,  2.12it/s]


🔹 **Batch 20:**
   ✅ Avg Similarity Score: 0.3772
   ✅ Max Logits: 6.3993, Min Logits: -2.1619
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 21it [00:10,  2.10it/s]


🔹 **Batch 21:**
   ✅ Avg Similarity Score: 0.4586
   ✅ Max Logits: 6.3984, Min Logits: -2.4592
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 22it [00:10,  2.11it/s]


🔹 **Batch 22:**
   ✅ Avg Similarity Score: 0.4995
   ✅ Max Logits: 6.3980, Min Logits: -2.0881
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 23it [00:11,  2.12it/s]


🔹 **Batch 23:**
   ✅ Avg Similarity Score: 0.4702
   ✅ Max Logits: 6.3919, Min Logits: -2.4256
   ❌ Batch Classification Error: 28.1250%


Evaluating model: 24it [00:11,  2.10it/s]


🔹 **Batch 24:**
   ✅ Avg Similarity Score: 0.3631
   ✅ Max Logits: 6.3997, Min Logits: -2.4149
   ❌ Batch Classification Error: 29.6875%


Evaluating model: 25it [00:12,  2.09it/s]


🔹 **Batch 25:**
   ✅ Avg Similarity Score: 0.4266
   ✅ Max Logits: 6.3999, Min Logits: -2.1689
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 26it [00:12,  2.12it/s]


🔹 **Batch 26:**
   ✅ Avg Similarity Score: 0.4598
   ✅ Max Logits: 6.3946, Min Logits: -2.7202
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 27it [00:13,  2.13it/s]


🔹 **Batch 27:**
   ✅ Avg Similarity Score: 0.5393
   ✅ Max Logits: 6.3999, Min Logits: -2.1764
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 28it [00:13,  2.14it/s]


🔹 **Batch 28:**
   ✅ Avg Similarity Score: 0.4151
   ✅ Max Logits: 6.4000, Min Logits: -2.4254
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 29it [00:14,  2.13it/s]


🔹 **Batch 29:**
   ✅ Avg Similarity Score: 0.5520
   ✅ Max Logits: 6.3999, Min Logits: -2.4306
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 30it [00:14,  2.14it/s]


🔹 **Batch 30:**
   ✅ Avg Similarity Score: 0.4796
   ✅ Max Logits: 6.3913, Min Logits: -2.1211
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 31it [00:14,  2.13it/s]


🔹 **Batch 31:**
   ✅ Avg Similarity Score: 0.4737
   ✅ Max Logits: 6.3510, Min Logits: -2.4547
   ❌ Batch Classification Error: 10.9375%


Evaluating model: 32it [00:15,  2.13it/s]


🔹 **Batch 32:**
   ✅ Avg Similarity Score: 0.4967
   ✅ Max Logits: 6.3507, Min Logits: -2.4904
   ❌ Batch Classification Error: 9.3750%


Evaluating model: 33it [00:15,  2.12it/s]


🔹 **Batch 33:**
   ✅ Avg Similarity Score: 0.3910
   ✅ Max Logits: 6.4000, Min Logits: -2.6054
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 34it [00:16,  2.13it/s]


🔹 **Batch 34:**
   ✅ Avg Similarity Score: 0.4716
   ✅ Max Logits: 6.3612, Min Logits: -2.3964
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 35it [00:16,  2.11it/s]


🔹 **Batch 35:**
   ✅ Avg Similarity Score: 0.5045
   ✅ Max Logits: 6.3964, Min Logits: -2.5605
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 36it [00:17,  2.13it/s]


🔹 **Batch 36:**
   ✅ Avg Similarity Score: 0.4235
   ✅ Max Logits: 6.3883, Min Logits: -2.6283
   ❌ Batch Classification Error: 28.1250%


Evaluating model: 37it [00:17,  2.10it/s]


🔹 **Batch 37:**
   ✅ Avg Similarity Score: 0.4079
   ✅ Max Logits: 6.3675, Min Logits: -2.5240
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 38it [00:18,  2.10it/s]


🔹 **Batch 38:**
   ✅ Avg Similarity Score: 0.4725
   ✅ Max Logits: 6.3784, Min Logits: -2.8207
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 39it [00:18,  2.11it/s]


🔹 **Batch 39:**
   ✅ Avg Similarity Score: 0.4484
   ✅ Max Logits: 6.3761, Min Logits: -2.0732
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 40it [00:19,  2.10it/s]


🔹 **Batch 40:**
   ✅ Avg Similarity Score: 0.4087
   ✅ Max Logits: 6.3995, Min Logits: -2.6936
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 41it [00:19,  2.09it/s]


🔹 **Batch 41:**
   ✅ Avg Similarity Score: 0.4830
   ✅ Max Logits: 6.4000, Min Logits: -2.1431
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 42it [00:20,  2.08it/s]


🔹 **Batch 42:**
   ✅ Avg Similarity Score: 0.4483
   ✅ Max Logits: 6.4000, Min Logits: -2.1589
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 43it [00:20,  2.09it/s]


🔹 **Batch 43:**
   ✅ Avg Similarity Score: 0.4549
   ✅ Max Logits: 6.3472, Min Logits: -2.2048
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 44it [00:21,  2.06it/s]


🔹 **Batch 44:**
   ✅ Avg Similarity Score: 0.4720
   ✅ Max Logits: 6.3530, Min Logits: -2.1495
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 45it [00:21,  2.08it/s]


🔹 **Batch 45:**
   ✅ Avg Similarity Score: 0.4116
   ✅ Max Logits: 6.2967, Min Logits: -2.1750
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 46it [00:22,  2.09it/s]


🔹 **Batch 46:**
   ✅ Avg Similarity Score: 0.4485
   ✅ Max Logits: 6.3995, Min Logits: -2.2543
   ❌ Batch Classification Error: 10.9375%


Evaluating model: 47it [00:22,  2.09it/s]


🔹 **Batch 47:**
   ✅ Avg Similarity Score: 0.4115
   ✅ Max Logits: 6.3354, Min Logits: -3.0199
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 48it [00:23,  2.11it/s]


🔹 **Batch 48:**
   ✅ Avg Similarity Score: 0.4609
   ✅ Max Logits: 6.3983, Min Logits: -2.1728
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 49it [00:23,  2.12it/s]


🔹 **Batch 49:**
   ✅ Avg Similarity Score: 0.3753
   ✅ Max Logits: 6.3392, Min Logits: -2.6491
   ❌ Batch Classification Error: 26.5625%


Evaluating model: 50it [00:24,  2.10it/s]


🔹 **Batch 50:**
   ✅ Avg Similarity Score: 0.4582
   ✅ Max Logits: 6.3335, Min Logits: -2.8569
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 51it [00:24,  2.10it/s]


🔹 **Batch 51:**
   ✅ Avg Similarity Score: 0.4667
   ✅ Max Logits: 6.3998, Min Logits: -2.2853
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 52it [00:24,  2.11it/s]


🔹 **Batch 52:**
   ✅ Avg Similarity Score: 0.4297
   ✅ Max Logits: 6.3993, Min Logits: -2.3179
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 53it [00:25,  2.12it/s]


🔹 **Batch 53:**
   ✅ Avg Similarity Score: 0.4493
   ✅ Max Logits: 6.4000, Min Logits: -2.3235
   ❌ Batch Classification Error: 26.5625%


Evaluating model: 54it [00:25,  2.10it/s]


🔹 **Batch 54:**
   ✅ Avg Similarity Score: 0.4698
   ✅ Max Logits: 6.4000, Min Logits: -2.6465
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 55it [00:26,  2.11it/s]


🔹 **Batch 55:**
   ✅ Avg Similarity Score: 0.3847
   ✅ Max Logits: 6.3684, Min Logits: -2.2703
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 56it [00:26,  2.11it/s]


🔹 **Batch 56:**
   ✅ Avg Similarity Score: 0.3768
   ✅ Max Logits: 6.3764, Min Logits: -3.9358
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 57it [00:27,  2.12it/s]


🔹 **Batch 57:**
   ✅ Avg Similarity Score: 0.4330
   ✅ Max Logits: 6.4000, Min Logits: -2.5774
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 58it [00:27,  2.12it/s]


🔹 **Batch 58:**
   ✅ Avg Similarity Score: 0.5293
   ✅ Max Logits: 6.2896, Min Logits: -2.9248
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 59it [00:28,  2.13it/s]


🔹 **Batch 59:**
   ✅ Avg Similarity Score: 0.3877
   ✅ Max Logits: 6.3911, Min Logits: -3.2361
   ❌ Batch Classification Error: 29.6875%


Evaluating model: 60it [00:28,  2.13it/s]


🔹 **Batch 60:**
   ✅ Avg Similarity Score: 0.4893
   ✅ Max Logits: 6.3951, Min Logits: -2.4845
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 61it [00:29,  2.12it/s]


🔹 **Batch 61:**
   ✅ Avg Similarity Score: 0.4073
   ✅ Max Logits: 6.3803, Min Logits: -2.3683
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 62it [00:29,  2.11it/s]


🔹 **Batch 62:**
   ✅ Avg Similarity Score: 0.5019
   ✅ Max Logits: 6.3826, Min Logits: -2.2940
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 63it [00:30,  2.11it/s]


🔹 **Batch 63:**
   ✅ Avg Similarity Score: 0.4315
   ✅ Max Logits: 6.3972, Min Logits: -2.4382
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 64it [00:30,  2.12it/s]


🔹 **Batch 64:**
   ✅ Avg Similarity Score: 0.4080
   ✅ Max Logits: 6.4000, Min Logits: -2.3909
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 65it [00:31,  2.14it/s]


🔹 **Batch 65:**
   ✅ Avg Similarity Score: 0.4717
   ✅ Max Logits: 6.3793, Min Logits: -2.3162
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 66it [00:31,  2.13it/s]


🔹 **Batch 66:**
   ✅ Avg Similarity Score: 0.3916
   ✅ Max Logits: 6.3719, Min Logits: -3.0951
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 67it [00:32,  2.13it/s]


🔹 **Batch 67:**
   ✅ Avg Similarity Score: 0.4281
   ✅ Max Logits: 6.2794, Min Logits: -2.2591
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 68it [00:32,  2.14it/s]


🔹 **Batch 68:**
   ✅ Avg Similarity Score: 0.3693
   ✅ Max Logits: 6.3968, Min Logits: -2.7778
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 69it [00:32,  2.11it/s]


🔹 **Batch 69:**
   ✅ Avg Similarity Score: 0.4886
   ✅ Max Logits: 6.3890, Min Logits: -2.6816
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 70it [00:33,  1.96it/s]


🔹 **Batch 70:**
   ✅ Avg Similarity Score: 0.4422
   ✅ Max Logits: 6.3948, Min Logits: -2.2096
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 71it [00:34,  2.01it/s]


🔹 **Batch 71:**
   ✅ Avg Similarity Score: 0.5341
   ✅ Max Logits: 6.3988, Min Logits: -2.4130
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 72it [00:34,  2.05it/s]


🔹 **Batch 72:**
   ✅ Avg Similarity Score: 0.4318
   ✅ Max Logits: 6.3318, Min Logits: -2.1073
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 73it [00:34,  2.06it/s]


🔹 **Batch 73:**
   ✅ Avg Similarity Score: 0.3599
   ✅ Max Logits: 6.4000, Min Logits: -2.3631
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 74it [00:35,  2.10it/s]


🔹 **Batch 74:**
   ✅ Avg Similarity Score: 0.3849
   ✅ Max Logits: 6.0363, Min Logits: -2.4789
   ❌ Batch Classification Error: 28.1250%


Evaluating model: 75it [00:35,  2.11it/s]


🔹 **Batch 75:**
   ✅ Avg Similarity Score: 0.4783
   ✅ Max Logits: 6.3213, Min Logits: -2.2356
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 76it [00:36,  2.11it/s]


🔹 **Batch 76:**
   ✅ Avg Similarity Score: 0.4576
   ✅ Max Logits: 6.3864, Min Logits: -2.3821
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 77it [00:36,  2.13it/s]


🔹 **Batch 77:**
   ✅ Avg Similarity Score: 0.5150
   ✅ Max Logits: 6.3209, Min Logits: -2.9594
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 78it [00:37,  2.13it/s]


🔹 **Batch 78:**
   ✅ Avg Similarity Score: 0.4342
   ✅ Max Logits: 6.3485, Min Logits: -2.0968
   ❌ Batch Classification Error: 28.1250%


Evaluating model: 79it [00:37,  2.13it/s]


🔹 **Batch 79:**
   ✅ Avg Similarity Score: 0.5512
   ✅ Max Logits: 6.3989, Min Logits: -2.2705
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 80it [00:38,  2.13it/s]


🔹 **Batch 80:**
   ✅ Avg Similarity Score: 0.4642
   ✅ Max Logits: 6.3661, Min Logits: -2.4331
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 81it [00:38,  2.14it/s]


🔹 **Batch 81:**
   ✅ Avg Similarity Score: 0.4503
   ✅ Max Logits: 6.3981, Min Logits: -2.5409
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 82it [00:39,  2.14it/s]


🔹 **Batch 82:**
   ✅ Avg Similarity Score: 0.3870
   ✅ Max Logits: 6.4000, Min Logits: -2.6036
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 83it [00:39,  2.13it/s]


🔹 **Batch 83:**
   ✅ Avg Similarity Score: 0.4680
   ✅ Max Logits: 6.3805, Min Logits: -2.3536
   ❌ Batch Classification Error: 28.1250%


Evaluating model: 84it [00:40,  2.14it/s]


🔹 **Batch 84:**
   ✅ Avg Similarity Score: 0.4075
   ✅ Max Logits: 6.4000, Min Logits: -2.3470
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 85it [00:40,  2.11it/s]


🔹 **Batch 85:**
   ✅ Avg Similarity Score: 0.4483
   ✅ Max Logits: 6.4000, Min Logits: -2.3526
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 86it [00:41,  2.11it/s]


🔹 **Batch 86:**
   ✅ Avg Similarity Score: 0.4653
   ✅ Max Logits: 6.3758, Min Logits: -2.2432
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 87it [00:41,  2.10it/s]


🔹 **Batch 87:**
   ✅ Avg Similarity Score: 0.4758
   ✅ Max Logits: 6.4000, Min Logits: -2.7056
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 88it [00:42,  2.10it/s]


🔹 **Batch 88:**
   ✅ Avg Similarity Score: 0.4592
   ✅ Max Logits: 6.4000, Min Logits: -1.8967
   ❌ Batch Classification Error: 10.9375%


Evaluating model: 89it [00:42,  2.11it/s]


🔹 **Batch 89:**
   ✅ Avg Similarity Score: 0.5070
   ✅ Max Logits: 6.3862, Min Logits: -2.2526
   ❌ Batch Classification Error: 10.9375%


Evaluating model: 90it [00:42,  2.12it/s]


🔹 **Batch 90:**
   ✅ Avg Similarity Score: 0.5381
   ✅ Max Logits: 6.4000, Min Logits: -2.3022
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 91it [00:43,  2.13it/s]


🔹 **Batch 91:**
   ✅ Avg Similarity Score: 0.4490
   ✅ Max Logits: 6.4000, Min Logits: -2.2641
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 92it [00:43,  2.14it/s]


🔹 **Batch 92:**
   ✅ Avg Similarity Score: 0.4849
   ✅ Max Logits: 6.4000, Min Logits: -2.6440
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 93it [00:44,  2.14it/s]


🔹 **Batch 93:**
   ✅ Avg Similarity Score: 0.4767
   ✅ Max Logits: 6.4000, Min Logits: -2.4590
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 94it [00:44,  2.14it/s]


🔹 **Batch 94:**
   ✅ Avg Similarity Score: 0.4705
   ✅ Max Logits: 6.4000, Min Logits: -2.4214
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 95it [00:45,  2.10it/s]


🔹 **Batch 95:**
   ✅ Avg Similarity Score: 0.4595
   ✅ Max Logits: 6.3831, Min Logits: -2.6985
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 96it [00:45,  2.11it/s]


🔹 **Batch 96:**
   ✅ Avg Similarity Score: 0.5127
   ✅ Max Logits: 6.3960, Min Logits: -2.1188
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 97it [00:46,  2.13it/s]


🔹 **Batch 97:**
   ✅ Avg Similarity Score: 0.4067
   ✅ Max Logits: 6.4000, Min Logits: -2.9405
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 98it [00:46,  2.08it/s]


🔹 **Batch 98:**
   ✅ Avg Similarity Score: 0.4530
   ✅ Max Logits: 6.3998, Min Logits: -2.0795
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 99it [00:47,  2.11it/s]


🔹 **Batch 99:**
   ✅ Avg Similarity Score: 0.4317
   ✅ Max Logits: 6.3999, Min Logits: -2.3683
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 100it [00:47,  2.13it/s]


🔹 **Batch 100:**
   ✅ Avg Similarity Score: 0.5056
   ✅ Max Logits: 6.3478, Min Logits: -2.4063
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 101it [00:48,  2.12it/s]


🔹 **Batch 101:**
   ✅ Avg Similarity Score: 0.4217
   ✅ Max Logits: 6.3999, Min Logits: -2.8140
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 102it [00:48,  2.13it/s]


🔹 **Batch 102:**
   ✅ Avg Similarity Score: 0.4190
   ✅ Max Logits: 6.3999, Min Logits: -2.0822
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 103it [00:49,  2.12it/s]


🔹 **Batch 103:**
   ✅ Avg Similarity Score: 0.4996
   ✅ Max Logits: 6.3634, Min Logits: -2.2293
   ❌ Batch Classification Error: 10.9375%


Evaluating model: 104it [00:49,  2.13it/s]


🔹 **Batch 104:**
   ✅ Avg Similarity Score: 0.3598
   ✅ Max Logits: 6.4000, Min Logits: -2.3927
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 105it [00:50,  2.14it/s]


🔹 **Batch 105:**
   ✅ Avg Similarity Score: 0.4702
   ✅ Max Logits: 6.3655, Min Logits: -2.2828
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 106it [00:50,  2.13it/s]


🔹 **Batch 106:**
   ✅ Avg Similarity Score: 0.4118
   ✅ Max Logits: 5.8574, Min Logits: -2.2213
   ❌ Batch Classification Error: 7.8125%


Evaluating model: 107it [00:50,  2.13it/s]


🔹 **Batch 107:**
   ✅ Avg Similarity Score: 0.4371
   ✅ Max Logits: 6.4000, Min Logits: -2.4698
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 108it [00:51,  2.14it/s]


🔹 **Batch 108:**
   ✅ Avg Similarity Score: 0.5176
   ✅ Max Logits: 6.3843, Min Logits: -2.4147
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 109it [00:51,  2.15it/s]


🔹 **Batch 109:**
   ✅ Avg Similarity Score: 0.3796
   ✅ Max Logits: 6.3997, Min Logits: -2.2866
   ❌ Batch Classification Error: 26.5625%


Evaluating model: 110it [00:52,  2.14it/s]


🔹 **Batch 110:**
   ✅ Avg Similarity Score: 0.4138
   ✅ Max Logits: 6.3817, Min Logits: -2.3537
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 111it [00:52,  2.14it/s]


🔹 **Batch 111:**
   ✅ Avg Similarity Score: 0.4590
   ✅ Max Logits: 6.3239, Min Logits: -2.6874
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 112it [00:53,  2.14it/s]


🔹 **Batch 112:**
   ✅ Avg Similarity Score: 0.4625
   ✅ Max Logits: 6.3893, Min Logits: -2.3412
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 113it [00:53,  2.14it/s]


🔹 **Batch 113:**
   ✅ Avg Similarity Score: 0.4485
   ✅ Max Logits: 6.3996, Min Logits: -3.1772
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 114it [00:54,  2.14it/s]


🔹 **Batch 114:**
   ✅ Avg Similarity Score: 0.4656
   ✅ Max Logits: 6.3714, Min Logits: -2.4091
   ❌ Batch Classification Error: 26.5625%


Evaluating model: 115it [00:54,  2.13it/s]


🔹 **Batch 115:**
   ✅ Avg Similarity Score: 0.4722
   ✅ Max Logits: 6.3988, Min Logits: -2.2611
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 116it [00:55,  2.15it/s]


🔹 **Batch 116:**
   ✅ Avg Similarity Score: 0.4327
   ✅ Max Logits: 6.2974, Min Logits: -2.3673
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 117it [00:55,  2.15it/s]


🔹 **Batch 117:**
   ✅ Avg Similarity Score: 0.4692
   ✅ Max Logits: 6.3055, Min Logits: -2.2804
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 118it [00:56,  2.13it/s]


🔹 **Batch 118:**
   ✅ Avg Similarity Score: 0.4780
   ✅ Max Logits: 6.4000, Min Logits: -2.5812
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 119it [00:56,  2.14it/s]


🔹 **Batch 119:**
   ✅ Avg Similarity Score: 0.4186
   ✅ Max Logits: 6.3592, Min Logits: -2.6691
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 120it [00:57,  2.13it/s]


🔹 **Batch 120:**
   ✅ Avg Similarity Score: 0.4564
   ✅ Max Logits: 6.3995, Min Logits: -2.7635
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 121it [00:57,  2.14it/s]


🔹 **Batch 121:**
   ✅ Avg Similarity Score: 0.4812
   ✅ Max Logits: 6.4000, Min Logits: -2.6031
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 122it [00:57,  2.12it/s]


🔹 **Batch 122:**
   ✅ Avg Similarity Score: 0.4727
   ✅ Max Logits: 6.3999, Min Logits: -2.6200
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 123it [00:58,  2.13it/s]


🔹 **Batch 123:**
   ✅ Avg Similarity Score: 0.4696
   ✅ Max Logits: 6.3997, Min Logits: -2.2265
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 124it [00:58,  2.15it/s]


🔹 **Batch 124:**
   ✅ Avg Similarity Score: 0.4709
   ✅ Max Logits: 6.3967, Min Logits: -2.2535
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 125it [00:59,  2.12it/s]


🔹 **Batch 125:**
   ✅ Avg Similarity Score: 0.3937
   ✅ Max Logits: 6.4000, Min Logits: -2.3853
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 126it [00:59,  2.13it/s]


🔹 **Batch 126:**
   ✅ Avg Similarity Score: 0.4511
   ✅ Max Logits: 6.2457, Min Logits: -2.5075
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 127it [01:00,  2.13it/s]


🔹 **Batch 127:**
   ✅ Avg Similarity Score: 0.4419
   ✅ Max Logits: 6.3787, Min Logits: -2.8457
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 128it [01:00,  2.11it/s]


🔹 **Batch 128:**
   ✅ Avg Similarity Score: 0.5139
   ✅ Max Logits: 6.3968, Min Logits: -2.1356
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 129it [01:01,  2.13it/s]


🔹 **Batch 129:**
   ✅ Avg Similarity Score: 0.4320
   ✅ Max Logits: 6.3377, Min Logits: -2.1563
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 130it [01:01,  2.12it/s]


🔹 **Batch 130:**
   ✅ Avg Similarity Score: 0.4095
   ✅ Max Logits: 6.3987, Min Logits: -2.3414
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 131it [01:02,  2.13it/s]


🔹 **Batch 131:**
   ✅ Avg Similarity Score: 0.4749
   ✅ Max Logits: 6.4000, Min Logits: -2.2299
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 132it [01:02,  2.14it/s]


🔹 **Batch 132:**
   ✅ Avg Similarity Score: 0.4890
   ✅ Max Logits: 6.3676, Min Logits: -2.1483
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 133it [01:03,  2.11it/s]


🔹 **Batch 133:**
   ✅ Avg Similarity Score: 0.5093
   ✅ Max Logits: 6.3996, Min Logits: -2.5442
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 134it [01:03,  2.12it/s]


🔹 **Batch 134:**
   ✅ Avg Similarity Score: 0.4813
   ✅ Max Logits: 6.3994, Min Logits: -2.3644
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 135it [01:04,  2.12it/s]


🔹 **Batch 135:**
   ✅ Avg Similarity Score: 0.4789
   ✅ Max Logits: 6.3727, Min Logits: -2.9154
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 136it [01:04,  2.12it/s]


🔹 **Batch 136:**
   ✅ Avg Similarity Score: 0.3853
   ✅ Max Logits: 6.1581, Min Logits: -2.1771
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 137it [01:05,  2.13it/s]


🔹 **Batch 137:**
   ✅ Avg Similarity Score: 0.4274
   ✅ Max Logits: 6.4000, Min Logits: -2.6699
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 138it [01:05,  2.11it/s]


🔹 **Batch 138:**
   ✅ Avg Similarity Score: 0.4075
   ✅ Max Logits: 6.3971, Min Logits: -2.3919
   ❌ Batch Classification Error: 28.1250%


Evaluating model: 139it [01:05,  2.13it/s]


🔹 **Batch 139:**
   ✅ Avg Similarity Score: 0.5095
   ✅ Max Logits: 6.3707, Min Logits: -2.4333
   ❌ Batch Classification Error: 10.9375%


Evaluating model: 140it [01:06,  2.15it/s]


🔹 **Batch 140:**
   ✅ Avg Similarity Score: 0.4155
   ✅ Max Logits: 6.3959, Min Logits: -2.3916
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 141it [01:06,  2.14it/s]


🔹 **Batch 141:**
   ✅ Avg Similarity Score: 0.4563
   ✅ Max Logits: 6.3999, Min Logits: -2.3530
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 142it [01:07,  2.14it/s]


🔹 **Batch 142:**
   ✅ Avg Similarity Score: 0.4910
   ✅ Max Logits: 6.4000, Min Logits: -2.2200
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 143it [01:07,  2.10it/s]


🔹 **Batch 143:**
   ✅ Avg Similarity Score: 0.4746
   ✅ Max Logits: 6.3998, Min Logits: -2.9361
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 144it [01:08,  2.09it/s]


🔹 **Batch 144:**
   ✅ Avg Similarity Score: 0.4762
   ✅ Max Logits: 6.4000, Min Logits: -3.0327
   ❌ Batch Classification Error: 10.9375%


Evaluating model: 145it [01:08,  2.11it/s]


🔹 **Batch 145:**
   ✅ Avg Similarity Score: 0.4341
   ✅ Max Logits: 6.3888, Min Logits: -2.3819
   ❌ Batch Classification Error: 10.9375%


Evaluating model: 146it [01:09,  2.10it/s]


🔹 **Batch 146:**
   ✅ Avg Similarity Score: 0.4672
   ✅ Max Logits: 6.2609, Min Logits: -2.1447
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 147it [01:09,  2.10it/s]


🔹 **Batch 147:**
   ✅ Avg Similarity Score: 0.4392
   ✅ Max Logits: 6.3853, Min Logits: -2.3102
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 148it [01:10,  2.12it/s]


🔹 **Batch 148:**
   ✅ Avg Similarity Score: 0.5576
   ✅ Max Logits: 6.4000, Min Logits: -2.2760
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 149it [01:10,  2.14it/s]


🔹 **Batch 149:**
   ✅ Avg Similarity Score: 0.5630
   ✅ Max Logits: 6.4000, Min Logits: -2.7350
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 150it [01:11,  2.14it/s]


🔹 **Batch 150:**
   ✅ Avg Similarity Score: 0.4801
   ✅ Max Logits: 6.3927, Min Logits: -2.4962
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 151it [01:11,  2.12it/s]


🔹 **Batch 151:**
   ✅ Avg Similarity Score: 0.4753
   ✅ Max Logits: 6.3541, Min Logits: -2.4480
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 152it [01:12,  2.12it/s]


🔹 **Batch 152:**
   ✅ Avg Similarity Score: 0.4699
   ✅ Max Logits: 6.3245, Min Logits: -2.4973
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 153it [01:12,  2.13it/s]


🔹 **Batch 153:**
   ✅ Avg Similarity Score: 0.4662
   ✅ Max Logits: 6.4000, Min Logits: -2.3652
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 154it [01:13,  2.13it/s]


🔹 **Batch 154:**
   ✅ Avg Similarity Score: 0.4466
   ✅ Max Logits: 6.3599, Min Logits: -2.4717
   ❌ Batch Classification Error: 10.9375%


Evaluating model: 155it [01:13,  2.12it/s]


🔹 **Batch 155:**
   ✅ Avg Similarity Score: 0.4800
   ✅ Max Logits: 6.3999, Min Logits: -2.3237
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 156it [01:13,  2.12it/s]


🔹 **Batch 156:**
   ✅ Avg Similarity Score: 0.4073
   ✅ Max Logits: 6.3984, Min Logits: -2.4960
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 157it [01:14,  2.10it/s]


🔹 **Batch 157:**
   ✅ Avg Similarity Score: 0.4713
   ✅ Max Logits: 6.4000, Min Logits: -2.2380
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 158it [01:14,  2.12it/s]


🔹 **Batch 158:**
   ✅ Avg Similarity Score: 0.4205
   ✅ Max Logits: 6.4000, Min Logits: -2.8145
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 159it [01:15,  2.13it/s]


🔹 **Batch 159:**
   ✅ Avg Similarity Score: 0.4656
   ✅ Max Logits: 6.2939, Min Logits: -2.2723
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 160it [01:15,  2.13it/s]


🔹 **Batch 160:**
   ✅ Avg Similarity Score: 0.4726
   ✅ Max Logits: 6.3893, Min Logits: -2.3625
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 161it [01:16,  2.11it/s]


🔹 **Batch 161:**
   ✅ Avg Similarity Score: 0.4191
   ✅ Max Logits: 6.3718, Min Logits: -3.5203
   ❌ Batch Classification Error: 23.4375%


Evaluating model: 162it [01:16,  2.11it/s]


🔹 **Batch 162:**
   ✅ Avg Similarity Score: 0.4647
   ✅ Max Logits: 6.4000, Min Logits: -2.7100
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 163it [01:17,  2.12it/s]


🔹 **Batch 163:**
   ✅ Avg Similarity Score: 0.4549
   ✅ Max Logits: 6.3934, Min Logits: -2.3611
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 164it [01:17,  2.15it/s]


🔹 **Batch 164:**
   ✅ Avg Similarity Score: 0.4407
   ✅ Max Logits: 6.3871, Min Logits: -2.2163
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 165it [01:18,  2.14it/s]


🔹 **Batch 165:**
   ✅ Avg Similarity Score: 0.4279
   ✅ Max Logits: 6.4000, Min Logits: -2.7460
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 166it [01:18,  2.13it/s]


🔹 **Batch 166:**
   ✅ Avg Similarity Score: 0.5405
   ✅ Max Logits: 6.3813, Min Logits: -2.1395
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 167it [01:19,  2.12it/s]


🔹 **Batch 167:**
   ✅ Avg Similarity Score: 0.4223
   ✅ Max Logits: 6.3921, Min Logits: -2.1832
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 168it [01:19,  2.10it/s]


🔹 **Batch 168:**
   ✅ Avg Similarity Score: 0.4689
   ✅ Max Logits: 6.3964, Min Logits: -2.1720
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 169it [01:20,  2.12it/s]


🔹 **Batch 169:**
   ✅ Avg Similarity Score: 0.4340
   ✅ Max Logits: 6.3433, Min Logits: -2.1909
   ❌ Batch Classification Error: 28.1250%


Evaluating model: 170it [01:20,  2.13it/s]


🔹 **Batch 170:**
   ✅ Avg Similarity Score: 0.4452
   ✅ Max Logits: 6.3892, Min Logits: -2.3641
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 171it [01:21,  2.15it/s]


🔹 **Batch 171:**
   ✅ Avg Similarity Score: 0.4549
   ✅ Max Logits: 6.3293, Min Logits: -2.6578
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 172it [01:21,  2.12it/s]


🔹 **Batch 172:**
   ✅ Avg Similarity Score: 0.4896
   ✅ Max Logits: 6.3908, Min Logits: -2.1973
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 173it [01:22,  2.11it/s]


🔹 **Batch 173:**
   ✅ Avg Similarity Score: 0.4422
   ✅ Max Logits: 6.3556, Min Logits: -2.4481
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 174it [01:22,  2.12it/s]


🔹 **Batch 174:**
   ✅ Avg Similarity Score: 0.5087
   ✅ Max Logits: 6.3795, Min Logits: -2.4356
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 175it [01:22,  2.13it/s]


🔹 **Batch 175:**
   ✅ Avg Similarity Score: 0.4793
   ✅ Max Logits: 6.3429, Min Logits: -2.4025
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 176it [01:23,  2.11it/s]


🔹 **Batch 176:**
   ✅ Avg Similarity Score: 0.4839
   ✅ Max Logits: 6.3998, Min Logits: -2.2731
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 177it [01:23,  2.11it/s]


🔹 **Batch 177:**
   ✅ Avg Similarity Score: 0.4488
   ✅ Max Logits: 6.4000, Min Logits: -2.4267
   ❌ Batch Classification Error: 32.8125%


Evaluating model: 178it [01:24,  2.10it/s]


🔹 **Batch 178:**
   ✅ Avg Similarity Score: 0.4988
   ✅ Max Logits: 6.3999, Min Logits: -2.5456
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 179it [01:24,  2.10it/s]


🔹 **Batch 179:**
   ✅ Avg Similarity Score: 0.5765
   ✅ Max Logits: 6.4000, Min Logits: -2.2734
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 180it [01:25,  2.10it/s]


🔹 **Batch 180:**
   ✅ Avg Similarity Score: 0.3961
   ✅ Max Logits: 6.3549, Min Logits: -2.2302
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 181it [01:25,  2.10it/s]


🔹 **Batch 181:**
   ✅ Avg Similarity Score: 0.4033
   ✅ Max Logits: 6.3731, Min Logits: -2.4351
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 182it [01:26,  2.09it/s]


🔹 **Batch 182:**
   ✅ Avg Similarity Score: 0.4140
   ✅ Max Logits: 6.3437, Min Logits: -2.3979
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 183it [01:26,  2.08it/s]


🔹 **Batch 183:**
   ✅ Avg Similarity Score: 0.4801
   ✅ Max Logits: 6.4000, Min Logits: -2.2722
   ❌ Batch Classification Error: 31.2500%


Evaluating model: 184it [01:27,  2.10it/s]


🔹 **Batch 184:**
   ✅ Avg Similarity Score: 0.4514
   ✅ Max Logits: 6.3975, Min Logits: -2.2906
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 185it [01:27,  2.10it/s]


🔹 **Batch 185:**
   ✅ Avg Similarity Score: 0.4641
   ✅ Max Logits: 6.4000, Min Logits: -2.2202
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 186it [01:28,  2.11it/s]


🔹 **Batch 186:**
   ✅ Avg Similarity Score: 0.4966
   ✅ Max Logits: 6.4000, Min Logits: -2.2145
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 187it [01:28,  2.11it/s]


🔹 **Batch 187:**
   ✅ Avg Similarity Score: 0.4474
   ✅ Max Logits: 6.3821, Min Logits: -2.1776
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 188it [01:29,  2.12it/s]


🔹 **Batch 188:**
   ✅ Avg Similarity Score: 0.4444
   ✅ Max Logits: 6.4000, Min Logits: -2.5275
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 189it [01:29,  2.11it/s]


🔹 **Batch 189:**
   ✅ Avg Similarity Score: 0.4876
   ✅ Max Logits: 6.2890, Min Logits: -2.1369
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 190it [01:30,  2.12it/s]


🔹 **Batch 190:**
   ✅ Avg Similarity Score: 0.4589
   ✅ Max Logits: 6.4000, Min Logits: -2.2137
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 191it [01:30,  2.13it/s]


🔹 **Batch 191:**
   ✅ Avg Similarity Score: 0.4595
   ✅ Max Logits: 6.3981, Min Logits: -3.2786
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 192it [01:31,  2.13it/s]


🔹 **Batch 192:**
   ✅ Avg Similarity Score: 0.5492
   ✅ Max Logits: 6.4000, Min Logits: -2.2731
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 193it [01:31,  2.13it/s]


🔹 **Batch 193:**
   ✅ Avg Similarity Score: 0.4475
   ✅ Max Logits: 6.3977, Min Logits: -2.6029
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 194it [01:31,  2.14it/s]


🔹 **Batch 194:**
   ✅ Avg Similarity Score: 0.4253
   ✅ Max Logits: 6.4000, Min Logits: -2.3790
   ❌ Batch Classification Error: 10.9375%


Evaluating model: 195it [01:32,  2.12it/s]


🔹 **Batch 195:**
   ✅ Avg Similarity Score: 0.4542
   ✅ Max Logits: 6.1483, Min Logits: -2.4116
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 196it [01:32,  2.12it/s]


🔹 **Batch 196:**
   ✅ Avg Similarity Score: 0.4093
   ✅ Max Logits: 6.3722, Min Logits: -2.2624
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 197it [01:33,  2.12it/s]


🔹 **Batch 197:**
   ✅ Avg Similarity Score: 0.4440
   ✅ Max Logits: 6.3782, Min Logits: -2.4141
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 198it [01:33,  2.13it/s]


🔹 **Batch 198:**
   ✅ Avg Similarity Score: 0.4671
   ✅ Max Logits: 6.4000, Min Logits: -2.4353
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 199it [01:34,  2.15it/s]


🔹 **Batch 199:**
   ✅ Avg Similarity Score: 0.5193
   ✅ Max Logits: 6.4000, Min Logits: -2.1069
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 200it [01:34,  2.15it/s]


🔹 **Batch 200:**
   ✅ Avg Similarity Score: 0.4426
   ✅ Max Logits: 6.2969, Min Logits: -2.6217
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 201it [01:35,  2.14it/s]


🔹 **Batch 201:**
   ✅ Avg Similarity Score: 0.4128
   ✅ Max Logits: 6.3717, Min Logits: -2.3543
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 202it [01:35,  2.13it/s]


🔹 **Batch 202:**
   ✅ Avg Similarity Score: 0.3717
   ✅ Max Logits: 6.3606, Min Logits: -2.3567
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 203it [01:36,  2.13it/s]


🔹 **Batch 203:**
   ✅ Avg Similarity Score: 0.3287
   ✅ Max Logits: 6.3954, Min Logits: -2.5589
   ❌ Batch Classification Error: 26.5625%


Evaluating model: 204it [01:36,  2.11it/s]


🔹 **Batch 204:**
   ✅ Avg Similarity Score: 0.4172
   ✅ Max Logits: 6.4000, Min Logits: -2.1084
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 205it [01:37,  2.11it/s]


🔹 **Batch 205:**
   ✅ Avg Similarity Score: 0.5343
   ✅ Max Logits: 6.4000, Min Logits: -2.9100
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 206it [01:37,  2.13it/s]


🔹 **Batch 206:**
   ✅ Avg Similarity Score: 0.5194
   ✅ Max Logits: 6.4000, Min Logits: -2.1541
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 207it [01:38,  2.13it/s]


🔹 **Batch 207:**
   ✅ Avg Similarity Score: 0.4850
   ✅ Max Logits: 6.4000, Min Logits: -2.1833
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 208it [01:38,  2.15it/s]


🔹 **Batch 208:**
   ✅ Avg Similarity Score: 0.4907
   ✅ Max Logits: 6.3998, Min Logits: -2.6063
   ❌ Batch Classification Error: 26.5625%


Evaluating model: 209it [01:38,  2.13it/s]


🔹 **Batch 209:**
   ✅ Avg Similarity Score: 0.4660
   ✅ Max Logits: 6.3707, Min Logits: -2.1846
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 210it [01:39,  2.13it/s]


🔹 **Batch 210:**
   ✅ Avg Similarity Score: 0.4001
   ✅ Max Logits: 6.4000, Min Logits: -2.3652
   ❌ Batch Classification Error: 29.6875%


Evaluating model: 211it [01:39,  2.12it/s]


🔹 **Batch 211:**
   ✅ Avg Similarity Score: 0.4718
   ✅ Max Logits: 6.3492, Min Logits: -2.1242
   ❌ Batch Classification Error: 10.9375%


Evaluating model: 212it [01:40,  2.13it/s]


🔹 **Batch 212:**
   ✅ Avg Similarity Score: 0.5909
   ✅ Max Logits: 6.4000, Min Logits: -2.3261
   ❌ Batch Classification Error: 4.6875%


Evaluating model: 213it [01:40,  2.11it/s]


🔹 **Batch 213:**
   ✅ Avg Similarity Score: 0.5796
   ✅ Max Logits: 6.4000, Min Logits: -2.4383
   ❌ Batch Classification Error: 31.2500%


Evaluating model: 214it [01:41,  2.12it/s]


🔹 **Batch 214:**
   ✅ Avg Similarity Score: 0.5029
   ✅ Max Logits: 6.4000, Min Logits: -2.3945
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 215it [01:41,  2.13it/s]


🔹 **Batch 215:**
   ✅ Avg Similarity Score: 0.4485
   ✅ Max Logits: 6.4000, Min Logits: -2.2507
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 216it [01:42,  2.14it/s]


🔹 **Batch 216:**
   ✅ Avg Similarity Score: 0.4544
   ✅ Max Logits: 6.4000, Min Logits: -2.1173
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 217it [01:42,  2.14it/s]


🔹 **Batch 217:**
   ✅ Avg Similarity Score: 0.4245
   ✅ Max Logits: 6.3415, Min Logits: -2.3803
   ❌ Batch Classification Error: 18.7500%


Evaluating model: 218it [01:43,  2.12it/s]


🔹 **Batch 218:**
   ✅ Avg Similarity Score: 0.5308
   ✅ Max Logits: 6.4000, Min Logits: -2.2233
   ❌ Batch Classification Error: 21.8750%


Evaluating model: 219it [01:43,  2.14it/s]


🔹 **Batch 219:**
   ✅ Avg Similarity Score: 0.4913
   ✅ Max Logits: 6.3971, Min Logits: -2.0977
   ❌ Batch Classification Error: 15.6250%


Evaluating model: 220it [01:44,  2.15it/s]


🔹 **Batch 220:**
   ✅ Avg Similarity Score: 0.3440
   ✅ Max Logits: 6.3967, Min Logits: -2.2525
   ❌ Batch Classification Error: 17.1875%


Evaluating model: 221it [01:44,  2.15it/s]


🔹 **Batch 221:**
   ✅ Avg Similarity Score: 0.3840
   ✅ Max Logits: 6.3789, Min Logits: -2.9579
   ❌ Batch Classification Error: 25.0000%


Evaluating model: 222it [01:45,  2.11it/s]


🔹 **Batch 222:**
   ✅ Avg Similarity Score: 0.4764
   ✅ Max Logits: 6.1836, Min Logits: -2.3722
   ❌ Batch Classification Error: 20.3125%


Evaluating model: 223it [01:45,  2.11it/s]


🔹 **Batch 223:**
   ✅ Avg Similarity Score: 0.3960
   ✅ Max Logits: 6.2612, Min Logits: -2.2133
   ❌ Batch Classification Error: 12.5000%


Evaluating model: 224it [01:46,  2.13it/s]


🔹 **Batch 224:**
   ✅ Avg Similarity Score: 0.5454
   ✅ Max Logits: 6.3799, Min Logits: -2.6398
   ❌ Batch Classification Error: 14.0625%


Evaluating model: 225it [01:46,  2.11it/s]


🔹 **Batch 225:**
   ✅ Avg Similarity Score: 0.3744
   ✅ Max Logits: 6.3509, Min Logits: -2.2505
   ❌ Batch Classification Error: 20.3125%

🔹 **Final Model Evaluation:**
   🔹 Total Samples Processed: 14400
   🔹 Total Batches: 225
   ✅ Overall Classification Error: 19.1875%





In [None]:

import json
from safetensors.torch import load_file
import torch
from transformers import AutoConfig, AutoTokenizer, Trainer, TrainingArguments, AdamW, TrainerState
from transformers import get_linear_schedule_with_warmup

# ✅ 1. Load model config
model_checkpoint = "/content/drive/MyDrive/new_siamese_model/checkpoint-20000"
config = AutoConfig.from_pretrained(model_checkpoint)
model = SiameseModel(config)

# ✅ 2. Load model weights from `safetensors`
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_weights = load_file(f"{model_checkpoint}/model.safetensors", device="cpu")

# ✅ 3. Apply weights correctly
missing_keys, unexpected_keys = model.load_state_dict(model_weights, strict=False)
print(f"🔹 Missing keys: {missing_keys}")  # Should be empty if properly loaded
print(f"🔹 Unexpected keys: {unexpected_keys}")  # Should be empty if properly loaded

# ✅ Define warm-up steps (first 1000 batches)
num_warmup_steps = 200  # Gradually increase LR over 600 batches

# ✅ 5. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/new_siamese_model",
    per_device_train_batch_size=128,  # ✅ Using larger batch size
    per_device_eval_batch_size=128,
    num_train_epochs=2,
    max_steps=50000,
    fp16=True,  # ✅ Enables mixed precision training
    save_strategy="steps",
    logging_strategy="steps",
    logging_steps=200,
    save_steps=1000,
    load_best_model_at_end=False,
    report_to="none",
    logging_first_step=True
)


# ✅ 7. Reset optimizer (IGNORE previous optimizer state)
optimizer = AdamW([
    {"params": model.roberta.parameters(), "lr": 2e-5, "weight_decay": 0.0001},  # RoBERTa layers
    {"params": model.net.parameters(), "lr": 5e-5, "weight_decay": 0.001},  # Custom layers
], eps=1e-8)
# ✅ Learning rate scheduler with warm-up

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=40000
)

# ✅ 9. Initialize Trainer (Manually set optimizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,  # ✅ Now only evaluates on 1000 samples!
    data_collator=siamese_data_collator,
    optimizers=(optimizer, lr_scheduler),
)


trainer.train()  # 🚀 Ignore optimizer mismatch