In [1]:
%%writefile train_on_gpu_0_gemma3.py

import os
import pandas as pd
import torch
import datetime
from collections import defaultdict
from torch.utils.data import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
)
from transformers.utils import is_torch_bf16_gpu_available
from peft import LoraConfig, TaskType, get_peft_model, PeftModel
from trl import DataCollatorForCompletionOnlyLM
from tqdm import tqdm
import torch.nn.functional as F
import random

# ============================================================================
# Configuration
# ============================================================================
MODEL_NAME = "/kaggle/input/llm-gemma3/gemma-3-transformers-gemma-3-1b-it-v1"
DATA_PATH_TRAIN = "/kaggle/input/jigsaw-agile-community-rules/train.csv"
DATA_PATH_TEST = "/kaggle/input/jigsaw-agile-community-rules/test.csv"

timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
MODEL_OUTPUT_PATH = f"./lora_checkpoint_{timestamp}"

# Training hyperparameters
EPOCH = 2
LR = 1e-4
TRAIN_BS = 1
GRAD_ACC_NUM = 8
SEED = 42

# Dataset parameters
NUM_POS_EXAMPLES = 3
NUM_NEG_EXAMPLES = 3

# Inference parameters
BATCH_SIZE = 16

# ============================================================================
# Dataset Building
# ============================================================================
def build_training_dataset_dict(data_path_train: str, data_path_test: str, seed: int = 42):
    """
    Build training dataset from:
    1. Examples from test.csv (positive_example_1, positive_example_2, negative_example_1, negative_example_2)
    2. Examples AND nonexamples from train.csv
    
    Returns dict with structure:
    {
        rule: {
            'positive_example': list of texts that violate the rule,
            'negative_example': list of texts that don't violate the rule
        }
    }
    """
    random.seed(seed)
    
    train_df = pd.read_csv(data_path_train)
    test_df = pd.read_csv(data_path_test)
    
    dataset_dict = defaultdict(lambda: {
        'positive_example': set(),
        'negative_example': set()
    })
    
    # Process train.csv - both examples and nonexamples
    print("Processing train.csv...")
    for _, row in tqdm(train_df.iterrows(), total=len(train_df)):
        rule = row['rule']
        
        # Add body with its rule_violation label
        body = str(row['body']).strip() if pd.notna(row['body']) else ""
        if body:
            if row['rule_violation'] == 1:
                dataset_dict[rule]['positive_example'].add(body)
            else:
                dataset_dict[rule]['negative_example'].add(body)
        
        # Add explicit examples
        for i in range(1, 3):
            pos_col = f'positive_example_{i}'
            if pos_col in train_df.columns:
                text = str(row[pos_col]).strip() if pd.notna(row[pos_col]) else ""
                if text:
                    dataset_dict[rule]['positive_example'].add(text)
            
            neg_col = f'negative_example_{i}'
            if neg_col in train_df.columns:
                text = str(row[neg_col]).strip() if pd.notna(row[neg_col]) else ""
                if text:
                    dataset_dict[rule]['negative_example'].add(text)
    
    # Process test.csv - only explicit examples
    print("Processing test.csv examples...")
    for _, row in tqdm(test_df.iterrows(), total=len(test_df)):
        rule = row['rule']
        
        for i in range(1, 3):
            pos_col = f'positive_example_{i}'
            if pos_col in test_df.columns:
                text = str(row[pos_col]).strip() if pd.notna(row[pos_col]) else ""
                if text:
                    dataset_dict[rule]['positive_example'].add(text)
            
            neg_col = f'negative_example_{i}'
            if neg_col in test_df.columns:
                text = str(row[neg_col]).strip() if pd.notna(row[neg_col]) else ""
                if text:
                    dataset_dict[rule]['negative_example'].add(text)
    
    # Convert sets to lists
    result_dict = {}
    for rule, value in dataset_dict.items():
        result_dict[rule] = {
            'positive_example': list(value['positive_example']),
            'negative_example': list(value['negative_example'])
        }
    
    return result_dict


def build_inference_data(data_path_test: str):
    """
    Build inference data from test.csv body column (nonexamples)
    
    Returns list of dicts with:
    [
        {
            'row_id': row_id,
            'rule': rule text,
            'text': body text
        }
    ]
    """
    test_df = pd.read_csv(data_path_test)
    
    inference_data = []
    for _, row in test_df.iterrows():
        body = str(row['body']).strip() if pd.notna(row['body']) else ""
        if body:
            inference_data.append({
                'row_id': row['row_id'],
                'rule': row['rule'],
                'text': body
            })
    
    return inference_data


# ============================================================================
# Training Dataset
# ============================================================================
class TrainingDataset(Dataset):
    """
    Training dataset that creates prompts with examples and labels.
    """
    
    def __init__(
        self, 
        dataset_dict,
        tokenizer,
        num_pos_examples: int = 2,
        num_neg_examples: int = 2,
        sys_prompt: str = "You are a content moderator for Reddit. Your task is to determine if comments violate community rules. Use the examples as guidance. Answer only 'Yes' if it violates the rule, or 'No' if it doesn't.",
        seed: int = None
    ):
        self.dataset_dict = dataset_dict
        self.tokenizer = tokenizer
        self.num_pos_examples = num_pos_examples
        self.num_neg_examples = num_neg_examples
        self.sys_prompt = sys_prompt
        
        if seed is not None:
            random.seed(seed)
        
        # Create training examples
        self.examples = []
        
        for rule, categories in dataset_dict.items():
            pos_examples = categories['positive_example']
            neg_examples = categories['negative_example']
            
            # Add positive examples as training targets
            for text in pos_examples:
                self.examples.append({
                    'rule': rule,
                    'text': text,
                    'label': 1,
                    'pos_pool': [p for p in pos_examples if p != text],
                    'neg_pool': neg_examples
                })
            
            # Add negative examples as training targets
            for text in neg_examples:
                self.examples.append({
                    'rule': rule,
                    'text': text,
                    'label': 0,
                    'pos_pool': pos_examples,
                    'neg_pool': [n for n in neg_examples if n != text]
                })
    
    def __len__(self):
        return len(self.examples)
    
    def _build_prompt_text(self, example, sampled_pos_examples, sampled_neg_examples):
        """Build prompt with rule, examples, and text to classify"""
        prompt_parts = [
            self.sys_prompt,
            "",
            f"Rule: \"{example['rule']}\"",
            ""
        ]
        
        if sampled_pos_examples:
            prompt_parts.append("Here are examples of comments that VIOLATE this rule:")
            for i, pos_ex in enumerate(sampled_pos_examples, 1):
                prompt_parts.append(f"{i}. {pos_ex}")
            prompt_parts.append("")
        
        if sampled_neg_examples:
            prompt_parts.append("Here are examples of comments that DO NOT violate this rule:")
            for i, neg_ex in enumerate(sampled_neg_examples, 1):
                prompt_parts.append(f"{i}. {neg_ex}")
            prompt_parts.append("")
        
        prompt_parts.extend([
            "Now, evaluate this comment:",
            f"\"{example['text']}\"",
            "",
            "Does it violate the rule?"
        ])
        
        return "\n".join(prompt_parts)
    
    def __getitem__(self, idx):
        example = self.examples[idx]
        
        # Sample random examples
        num_pos_to_sample = min(self.num_pos_examples, len(example['pos_pool']))
        sampled_pos_examples = random.sample(example['pos_pool'], num_pos_to_sample) if num_pos_to_sample > 0 else []
        
        num_neg_to_sample = min(self.num_neg_examples, len(example['neg_pool']))
        sampled_neg_examples = random.sample(example['neg_pool'], num_neg_to_sample) if num_neg_to_sample > 0 else []
        
        # Build prompt
        user_content = self._build_prompt_text(example, sampled_pos_examples, sampled_neg_examples)
        
        messages = [{"role": "user", "content": user_content}]
        
        prompt = self.tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=False,
        )
        
        label_text = " Yes" if example['label'] == 1 else " No"
        full_text = prompt + label_text
        
        tokenized = self.tokenizer(full_text, add_special_tokens=False, truncation=False)
        
        return {"input_ids": tokenized["input_ids"]}


class ClassifyDataset(Dataset):
    """Wrapper for Trainer compatibility"""
    def __init__(self, base_dataset):
        self.base_dataset = base_dataset

    def __len__(self):
        return len(self.base_dataset)

    def __getitem__(self, index):
        item = self.base_dataset[index]
        return {"input_ids": item["input_ids"]}


# ============================================================================
# Inference Functions
# ============================================================================
def get_yes_no_probabilities(model, tokenizer, prompts):
    """Get probability scores for Yes/No tokens"""
    
    yes_tokens = tokenizer.encode(" Yes", add_special_tokens=False)
    no_tokens = tokenizer.encode(" No", add_special_tokens=False)
    
    yes_token_id = yes_tokens[0]
    no_token_id = no_tokens[0]
    
    inputs = tokenizer(
        prompts, 
        return_tensors="pt", 
        padding=True, 
        truncation=True
    ).to(model.device)
    
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
    
    last_token_logits = logits[:, -1, :]
    
    yes_logits = last_token_logits[:, yes_token_id]
    no_logits = last_token_logits[:, no_token_id]
    
    yes_no_logits = torch.stack([no_logits, yes_logits], dim=1)
    probabilities = F.softmax(yes_no_logits, dim=1)
    
    yes_probs = probabilities[:, 1].cpu().numpy()
    return yes_probs


def build_inference_prompt(rule, text, pos_examples, neg_examples, sys_prompt, tokenizer, num_pos=2, num_neg=2):
    """Build prompt for inference"""
    # Sample examples
    sampled_pos = random.sample(pos_examples, min(num_pos, len(pos_examples))) if pos_examples else []
    sampled_neg = random.sample(neg_examples, min(num_neg, len(neg_examples))) if neg_examples else []
    
    prompt_parts = [
        sys_prompt,
        "",
        f"Rule: \"{rule}\"",
        ""
    ]
    
    if sampled_pos:
        prompt_parts.append("Here are examples of comments that VIOLATE this rule:")
        for i, pos_ex in enumerate(sampled_pos, 1):
            prompt_parts.append(f"{i}. {pos_ex}")
        prompt_parts.append("")
    
    if sampled_neg:
        prompt_parts.append("Here are examples of comments that DO NOT violate this rule:")
        for i, neg_ex in enumerate(sampled_neg, 1):
            prompt_parts.append(f"{i}. {neg_ex}")
        prompt_parts.append("")
    
    prompt_parts.extend([
        "Now, evaluate this comment:",
        f"\"{text}\"",
        "",
        "Does it violate the rule?"
    ])
    
    user_content = "\n".join(prompt_parts)
    messages = [{"role": "user", "content": user_content}]
    
    prompt = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=False,
    )
    
    return prompt


# ============================================================================
# Main Script
# ============================================================================

print("=" * 80)
print("Gemma-3 Training and Inference Pipeline")
print("=" * 80)

# ------------------------------------------------------------------------
# Step 1: Build dataset
# ------------------------------------------------------------------------
print("\n[Step 1/5] Building training dataset...")
dataset_dict = build_training_dataset_dict(DATA_PATH_TRAIN, DATA_PATH_TEST, seed=SEED)
print(f"Total rules: {len(dataset_dict)}")

total_pos = sum(len(v['positive_example']) for v in dataset_dict.values())
total_neg = sum(len(v['negative_example']) for v in dataset_dict.values())
print(f"Total positive examples: {total_pos}")
print(f"Total negative examples: {total_neg}")

# ------------------------------------------------------------------------
# Step 2: Initialize tokenizer
# ------------------------------------------------------------------------
print("\n[Step 2/5] Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.padding_side = "left"
print(f"Tokenizer loaded: {MODEL_NAME}")

# ------------------------------------------------------------------------
# Step 3: Create training dataset
# ------------------------------------------------------------------------
print("\n[Step 3/5] Creating training dataset...")
train_base_dataset = TrainingDataset(
    dataset_dict,
    tokenizer=tokenizer,
    num_pos_examples=NUM_POS_EXAMPLES,
    num_neg_examples=NUM_NEG_EXAMPLES,
    seed=SEED
)
train_dataset = ClassifyDataset(train_base_dataset)
print(f"Training dataset size: {len(train_dataset)}")

data_collator = DataCollatorForCompletionOnlyLM("Does it violate the rule?", tokenizer=tokenizer)

# ------------------------------------------------------------------------
# Step 4: Train model
# ------------------------------------------------------------------------
print("\n[Step 4/5] Training model...")
os.makedirs(MODEL_OUTPUT_PATH, exist_ok=True)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    trust_remote_code=True,
    attn_implementation='eager'
)
print(f"Model loaded: {MODEL_NAME}")

# Configure LoRA
lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    task_type=TaskType.CAUSAL_LM,
    bias='none',
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ]
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

training_args = TrainingArguments(
    output_dir=MODEL_OUTPUT_PATH,
    logging_steps=1,
    logging_strategy="steps",
    eval_strategy="no",
    save_strategy="epoch",
    num_train_epochs=EPOCH,
    optim="paged_adamw_8bit",
    lr_scheduler_type="linear",
    warmup_ratio=0.1,
    learning_rate=LR,
    weight_decay=0.01,
    bf16=is_torch_bf16_gpu_available(),
    fp16=not is_torch_bf16_gpu_available(),
    per_device_train_batch_size=TRAIN_BS,
    gradient_accumulation_steps=GRAD_ACC_NUM,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    group_by_length=False,
    report_to="none",
    seed=SEED,
    remove_unused_columns=False,
)

trainer = Trainer(
    model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator,
)

print("\nStarting training...")
trainer_output = trainer.train()

print("\nTraining complete!")
print(f"Final training loss: {trainer_output.training_loss:.4f}")
print(f"Saving model to: {MODEL_OUTPUT_PATH}")
trainer.save_model(MODEL_OUTPUT_PATH)

# ------------------------------------------------------------------------
# Step 5: Inference
# ------------------------------------------------------------------------
print("\n[Step 5/5] Running inference on test.csv...")

# Load inference data
inference_data = build_inference_data(DATA_PATH_TEST)
print(f"Total inference samples: {len(inference_data)}")

# Load trained model
print("Loading trained model...")
device = torch.device("cuda:0")
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    trust_remote_code=True,
).to(device)

trained_model = PeftModel.from_pretrained(base_model, MODEL_OUTPUT_PATH)
trained_model.eval()

# Prepare prompts
print("Preparing prompts...")
sys_prompt = "You are a content moderator for Reddit. Your task is to determine if comments violate community rules. Use the examples as guidance. Answer only 'Yes' if it violates the rule, or 'No' if it doesn't."

all_prompts = []
all_row_ids = []

for item in tqdm(inference_data):
    rule = item['rule']
    text = item['text']
    row_id = item['row_id']
    
    # Get examples for this rule
    pos_examples = dataset_dict.get(rule, {}).get('positive_example', [])
    neg_examples = dataset_dict.get(rule, {}).get('negative_example', [])
    
    prompt = build_inference_prompt(
        rule, text, pos_examples, neg_examples, 
        sys_prompt, tokenizer, NUM_POS_EXAMPLES, NUM_NEG_EXAMPLES
    )
    
    all_prompts.append(prompt)
    all_row_ids.append(row_id)

# Run inference in batches
print("Running inference...")
all_probabilities = []

with torch.no_grad():
    for i in tqdm(range(0, len(all_prompts), BATCH_SIZE)):
        batch_prompts = all_prompts[i:i + BATCH_SIZE]
        batch_probs = get_yes_no_probabilities(trained_model, tokenizer, batch_prompts)
        all_probabilities.extend(batch_probs)

# Create submission
submission_df = pd.DataFrame({
    'row_id': all_row_ids,
    'rule_violation': all_probabilities
})

submission_df.to_csv('submission1.csv', index=False)
print("\n" + "=" * 80)
print("Pipeline complete!")
print("=" * 80)
print(f"Submission saved to: submission1.csv")
print(f"Total predictions: {len(submission_df)}")
print(f"\nSample predictions:")
print(submission_df.head(10))

Writing train_on_gpu_0_gemma3.py


In [2]:
%%writefile train_on_gpu_0_bert.py

import os
import torch
import numpy as np
import pandas as pd
from datetime import datetime
from collections import defaultdict
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch.optim import AdamW

class Config:
    model_name_or_path = r"/kaggle/input/huggingfacedebertav3variants/deberta-v3-base"
    num_train_epochs = 3
    learning_rate = 2e-5
    per_device_train_batch_size = 24
    per_device_eval_batch_size = 64
    data_path = "/kaggle/input/jigsaw-agile-community-rules"
    max_length = 512
    warmup_ratio = 0.1
    weight_decay = 0.01
    seed = 42
    logging_steps = 50
    eval_ratio = 0.1  # Use 10% of training data for validation

# Dataset Classes
class JigsawBertDataset(torch.utils.data.Dataset):
    """
    PyTorch Dataset that tokenizes during initialization.
    Includes both example and nonexample texts for training.
    
    Combines rule and text with [SEP] token and returns tokenized tensors.
    
    Special labeling:
    - Positive examples/nonexamples: label = 1
    - Negative examples containing "http": label = 0.25
    - Negative examples without "http": label = 0
    - Negative nonexamples (from body): always label = 0
    
    Args:
        dataset_dict: Dictionary with rule keys and category keys
        tokenizer: Tokenizer to use for encoding texts
        max_length: Maximum sequence length for tokenization
    """
    
    def __init__(self, dataset_dict: dict, tokenizer, max_length: int = 512):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.labels = []
        
        # Flatten the dataset_dict and create input texts
        input_texts = []
        for rule, categories in dataset_dict.items():
            # Add positive examples (label=1)
            for text in categories['positive_example']:
                input_text = f"{rule}[SEP]{text}"
                input_texts.append(input_text)
                self.labels.append(1)

            # Add positive nonexamples (label=1)
            for text in categories['positive_nonexample']:
                input_text = f"{rule}[SEP]{text}"
                input_texts.append(input_text)
                self.labels.append(1)
            
            # Add negative examples (label=0 or 0.25 if contains http)
            for text in categories['negative_example']:
                input_text = f"{rule}[SEP]{text}"
                input_texts.append(input_text)
                # Check if text contains "http"
                label = 0.25 if 'http' in text.lower() else 0
                self.labels.append(label)
            
            # Add negative nonexamples (always label=0, from body text)
            for text in categories['negative_nonexample']:
                input_text = f"{rule}[SEP]{text}"
                input_texts.append(input_text)
                self.labels.append(0)
        
        # Tokenize all texts at once
        self.encodings = tokenizer(
            input_texts,
            truncation=True,
            padding=True,
            max_length=max_length,
            return_tensors='pt'
        )

    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        if self.labels:
            item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item


def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)


class InferenceDataset(torch.utils.data.Dataset):
    """
    Dataset for inference with only body texts.
    """
    def __init__(self, inference_data: dict, tokenizer, max_length: int = 512):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.row_ids = []
        
        input_texts = []
        for row_id, data in inference_data.items():
            rule = data['rule']
            text = data['body']
            
            input_text = f"{rule}[SEP]{text}"
            input_texts.append(input_text)
            self.row_ids.append(row_id)
        
        self.encodings = tokenizer(
            input_texts,
            truncation=True,
            padding=True,
            max_length=max_length,
            return_tensors='pt'
        )
    
    def __len__(self):
        return len(self.row_ids)
    
    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        return item


# Data Building Functions
def build_training_dataset(
    data_path_train: str,
    data_path_test: str
) -> dict:
    """
    Build dataset for training that includes:
    - All examples and non-examples from train.csv
    - All examples from test.csv
    
    Examples are grouped by rule only (subreddit is ignored).
    Duplicates are removed across all subreddits for the same rule.
    
    Returns:
        Dictionary with rule keys
    """
    train_df = pd.read_csv(data_path_train)
    test_df = pd.read_csv(data_path_test)
    
    dataset_dict = defaultdict(lambda: {
        'positive_example': set(),
        'positive_nonexample': set(),
        'negative_example': set(),
        'negative_nonexample': set()
    })
    
    # Process train.csv - add all examples and non-examples
    for _, row in tqdm(train_df.iterrows(), total=train_df.shape[0], desc="Processing train.csv"):
        rule = row['rule']
        
        # Add body with its rule_violation label as nonexample
        body = str(row['body']).strip() if pd.notna(row['body']) else ""
        if body and len(body) > 0:
            label_key = 'positive_nonexample' if row['rule_violation'] == 1 else 'negative_nonexample'
            dataset_dict[rule][label_key].add(body)
        
        # Add positive examples (1, 2)
        for i in range(1, 3):
            col_name = f'positive_example_{i}'
            if col_name in train_df.columns:
                text = str(row[col_name]).strip() if pd.notna(row[col_name]) else ""
                if text and len(text) > 0:
                    dataset_dict[rule]['positive_example'].add(text)
        
        # Add negative examples (1, 2)
        for i in range(1, 3):
            col_name = f'negative_example_{i}'
            if col_name in train_df.columns:
                text = str(row[col_name]).strip() if pd.notna(row[col_name]) else ""
                if text and len(text) > 0:
                    dataset_dict[rule]['negative_example'].add(text)
    
    # Process test.csv - add only explicit examples
    for _, row in tqdm(test_df.iterrows(), total=test_df.shape[0], desc="Processing test.csv"):
        rule = row['rule']
        
        # Add positive examples (1, 2)
        for i in range(1, 3):
            col_name = f'positive_example_{i}'
            if col_name in test_df.columns:
                text = str(row[col_name]).strip() if pd.notna(row[col_name]) else ""
                if text and len(text) > 0:
                    dataset_dict[rule]['positive_example'].add(text)
        
        # Add negative examples (1, 2)
        for i in range(1, 3):
            col_name = f'negative_example_{i}'
            if col_name in test_df.columns:
                text = str(row[col_name]).strip() if pd.notna(row[col_name]) else ""
                if text and len(text) > 0:
                    dataset_dict[rule]['negative_example'].add(text)
    
    # Convert defaultdict to regular dict (sets are already deduplicated)
    result_dict = dict(dataset_dict)
    
    return result_dict


def build_inference_dataset(data_path_test: str) -> dict:
    """
    Build dataset for inference that includes only non-examples from test.csv
    (body column with row_id for tracking).
    
    Returns:
        Dictionary with row_id as key and (rule, body) as value
    """
    test_df = pd.read_csv(data_path_test)
    
    inference_data = {}
    for _, row in tqdm(test_df.iterrows(), total=test_df.shape[0], desc="Processing test.csv for inference"):
        row_id = row['row_id']
        rule = row['rule']
        body = str(row['body']).strip() if pd.notna(row['body']) else ""
        
        if body and len(body) > 0:
            inference_data[row_id] = {
                'rule': rule,
                'body': body
            }
    
    return inference_data


def evaluate(model, val_loader, device):
    """Evaluate model on validation set"""
    model.eval()
    all_preds = []
    all_probs = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(val_loader, desc="Evaluating", leave=False):
            batch = {k: v.to(device) for k, v in batch.items()}
            labels = batch.pop('labels')
            
            outputs = model(**batch)
            logits = outputs.logits
            probs = torch.softmax(logits, dim=-1)[:, 1]
            preds = torch.argmax(logits, dim=-1)
            
            all_preds.extend(preds.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    all_preds = np.array(all_preds)
    all_probs = np.array(all_probs)
    all_labels = np.array(all_labels)
    
    metrics = {
        'auc': roc_auc_score(all_labels, all_probs),
        'accuracy': accuracy_score(all_labels, all_preds),
        'precision': precision_score(all_labels, all_preds, zero_division=0),
        'recall': recall_score(all_labels, all_preds, zero_division=0),
        'f1': f1_score(all_labels, all_preds, zero_division=0),
    }
    
    return metrics


def generate_predictions(model, inference_loader, device):
    """Generate predictions for inference data"""
    model.eval()
    all_probs = []
    
    with torch.no_grad():
        for batch in tqdm(inference_loader, desc="Generating predictions", leave=False):
            batch = {k: v.to(device) for k, v in batch.items()}
            
            outputs = model(**batch)
            logits = outputs.logits
            probs = torch.softmax(logits, dim=-1)[:, 1]
            
            all_probs.extend(probs.cpu().numpy())
    
    return np.array(all_probs)


def train():
    """Main training function with manual optimizer and scheduler"""
    cfg = Config()
    set_seed(cfg.seed)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Device: {device}")
    print(f"Training started at {datetime.now()}")
    print(f"Model: {cfg.model_name_or_path}")
    print(f"Epochs: {cfg.num_train_epochs}, Batch size: {cfg.per_device_train_batch_size}, LR: {cfg.learning_rate}\n")
    
    # Load data
    print("Loading datasets...")
    train_data_dict = build_training_dataset(
        data_path_train=f"{cfg.data_path}/train.csv",
        data_path_test=f"{cfg.data_path}/test.csv"
    )
    
    # Print statistics about rules and examples
    print("\nDataset Statistics:")
    print(f"Number of unique rules: {len(train_data_dict)}")
    total_examples = sum(
        len(cats['positive_example']) + len(cats['positive_nonexample']) +
        len(cats['negative_example']) + len(cats['negative_nonexample'])
        for cats in train_data_dict.values()
    )
    print(f"Total examples across all rules: {total_examples}")
    
    # FIXED: Suppress tokenizer conversion warnings
    import warnings
    warnings.filterwarnings("ignore", category=UserWarning, message=".*sentencepiece tokenizer.*")
    
    tokenizer = AutoTokenizer.from_pretrained(cfg.model_name_or_path, use_fast=False)
    
    train_dataset = JigsawBertDataset(train_data_dict, tokenizer, max_length=cfg.max_length)
    
    # Split into train and validation sets
    train_size = len(train_dataset)
    print(f"Train dataset: {len(train_dataset)} samples")
    
    # Create data loaders
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.per_device_train_batch_size,
        shuffle=True
    )
    
    # Initialize model
    model = AutoModelForSequenceClassification.from_pretrained(
        cfg.model_name_or_path, 
        num_labels=2
    ).to(device)
    
    # Setup optimizer and scheduler
    optimizer = AdamW(
        model.parameters(), 
        lr=cfg.learning_rate, 
        weight_decay=cfg.weight_decay
    )
    
    total_steps = len(train_loader) * cfg.num_train_epochs
    warmup_steps = int(total_steps * cfg.warmup_ratio)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, 
        warmup_steps, 
        total_steps
    )
    
    print(f"Total steps: {total_steps}")
    print(f"Warmup steps: {warmup_steps}\n")
    
    # Load inference data for predictions at each epoch
    print("Loading inference dataset...")
    inference_data = build_inference_dataset(f"{cfg.data_path}/test.csv")
    inference_dataset = InferenceDataset(
        inference_data, 
        tokenizer, 
        max_length=cfg.max_length
    )
    
    inference_loader = torch.utils.data.DataLoader(
        inference_dataset,
        batch_size=cfg.per_device_eval_batch_size,
        shuffle=False
    )
    
    print(f"Inference dataset: {len(inference_dataset)} samples\n")
    
    # Training loop
    print("="*60)
    print("Starting training...")
    print("="*60 + "\n")
    
    for epoch in range(cfg.num_train_epochs):
        print(f"\nEpoch {epoch + 1}/{cfg.num_train_epochs}")
        print("-" * 60)
        
        # Training phase
        model.train()
        total_loss = 0
        
        for step, batch in enumerate(tqdm(train_loader, desc="Training", leave=False)):
            batch = {k: v.to(device) for k, v in batch.items()}
            
            outputs = model(**batch)
            loss = outputs.loss
            
            total_loss += loss.item()
            
            loss.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
            
            if (step + 1) % cfg.logging_steps == 0:
                avg_loss = total_loss / (step + 1)
                print(f"Step {step + 1}, Avg Loss: {avg_loss:.4f}")
        
        avg_epoch_loss = total_loss / len(train_loader)
        print(f"Average Training Loss: {avg_epoch_loss:.4f}")
        
        # Generate predictions on inference set
        print("\nGenerating predictions on inference set...")
        probs = generate_predictions(model, inference_loader, device)
        
        # Create submission DataFrame
        submission_df = pd.DataFrame({
            'row_id': inference_dataset.row_ids,
            'rule_violation': probs
        })
        
        print(f"\nSubmission results for Epoch {epoch + 1}:")
        print(f"  Shape: {submission_df.shape}")
        print(f"  Mean probability: {probs.mean():.4f}")
        print(f"  Min probability: {probs.min():.4f}")
        print(f"  Max probability: {probs.max():.4f}")
        print("\nFirst few rows:")
        print(submission_df.head())
        
    # Save submission for this epoch
    submission_df.to_csv('submission2.csv', index=False)
    print("\n" + "="*60)
    print("COMPLETED")
    print("="*60)


train()

Writing train_on_gpu_0_bert.py


In [3]:
%%writefile train_on_gpu_1_qwen3.py

import os
import pandas as pd
import torch
import datetime
from collections import defaultdict
from torch.utils.data import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
)
from transformers.utils import is_torch_bf16_gpu_available
from peft import LoraConfig, TaskType, get_peft_model, PeftModel
from trl import DataCollatorForCompletionOnlyLM
from tqdm import tqdm
import torch.nn.functional as F
import random

# ============================================================================
# Configuration
# ============================================================================
MODEL_NAME = "/kaggle/input/qwen-3/transformers/1.7b-gptq-int8/1"
DATA_PATH_TRAIN = "/kaggle/input/jigsaw-agile-community-rules/train.csv"
DATA_PATH_TEST = "/kaggle/input/jigsaw-agile-community-rules/test.csv"

timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
MODEL_OUTPUT_PATH = f"./lora_checkpoint_{timestamp}"

# Training hyperparameters
EPOCH = 2
LR = 1e-4
TRAIN_BS = 1
GRAD_ACC_NUM = 8
SEED = 42

# Dataset parameters
NUM_POS_EXAMPLES = 2
NUM_NEG_EXAMPLES = 2

# Inference parameters
BATCH_SIZE = 16

# ============================================================================
# Dataset Building
# ============================================================================
def build_training_dataset_dict(data_path_train: str, data_path_test: str, seed: int = 42):
    """
    Build training dataset from:
    1. Examples from test.csv (positive_example_1, positive_example_2, negative_example_1, negative_example_2)
    2. Examples AND nonexamples from train.csv
    
    Returns dict with structure:
    {
        rule: {
            'positive_example': list of texts that violate the rule,
            'negative_example': list of texts that don't violate the rule
        }
    }
    """
    random.seed(seed)
    
    train_df = pd.read_csv(data_path_train)
    test_df = pd.read_csv(data_path_test)
    
    dataset_dict = defaultdict(lambda: {
        'positive_example': set(),
        'negative_example': set()
    })
    
    # Process train.csv - both examples and nonexamples
    print("Processing train.csv...")
    for _, row in tqdm(train_df.iterrows(), total=len(train_df)):
        rule = row['rule']
        
        # Add body with its rule_violation label
        body = str(row['body']).strip() if pd.notna(row['body']) else ""
        if body:
            if row['rule_violation'] == 1:
                dataset_dict[rule]['positive_example'].add(body)
            else:
                dataset_dict[rule]['negative_example'].add(body)
        
        # Add explicit examples
        for i in range(1, 3):
            pos_col = f'positive_example_{i}'
            if pos_col in train_df.columns:
                text = str(row[pos_col]).strip() if pd.notna(row[pos_col]) else ""
                if text:
                    dataset_dict[rule]['positive_example'].add(text)
            
            neg_col = f'negative_example_{i}'
            if neg_col in train_df.columns:
                text = str(row[neg_col]).strip() if pd.notna(row[neg_col]) else ""
                if text:
                    dataset_dict[rule]['negative_example'].add(text)
    
    # Process test.csv - only explicit examples
    print("Processing test.csv examples...")
    for _, row in tqdm(test_df.iterrows(), total=len(test_df)):
        rule = row['rule']
        
        for i in range(1, 3):
            pos_col = f'positive_example_{i}'
            if pos_col in test_df.columns:
                text = str(row[pos_col]).strip() if pd.notna(row[pos_col]) else ""
                if text:
                    dataset_dict[rule]['positive_example'].add(text)
            
            neg_col = f'negative_example_{i}'
            if neg_col in test_df.columns:
                text = str(row[neg_col]).strip() if pd.notna(row[neg_col]) else ""
                if text:
                    dataset_dict[rule]['negative_example'].add(text)
    
    # Convert sets to lists
    result_dict = {}
    for rule, value in dataset_dict.items():
        result_dict[rule] = {
            'positive_example': list(value['positive_example']),
            'negative_example': list(value['negative_example'])
        }
    
    return result_dict


def build_inference_data(data_path_test: str):
    """
    Build inference data from test.csv body column (nonexamples)
    
    Returns list of dicts with:
    [
        {
            'row_id': row_id,
            'rule': rule text,
            'text': body text
        }
    ]
    """
    test_df = pd.read_csv(data_path_test)
    
    inference_data = []
    for _, row in test_df.iterrows():
        body = str(row['body']).strip() if pd.notna(row['body']) else ""
        if body:
            inference_data.append({
                'row_id': row['row_id'],
                'rule': row['rule'],
                'text': body
            })
    
    return inference_data


# ============================================================================
# Training Dataset
# ============================================================================
class TrainingDataset(Dataset):
    """
    Training dataset that creates prompts with examples and labels.
    """
    
    def __init__(
        self, 
        dataset_dict,
        tokenizer,
        num_pos_examples: int = 2,
        num_neg_examples: int = 2,
        sys_prompt: str = "You are a content moderator for Reddit. Your task is to determine if comments violate community rules. Use the examples as guidance. Answer only 'Yes' if it violates the rule, or 'No' if it doesn't.",
        seed: int = None
    ):
        self.dataset_dict = dataset_dict
        self.tokenizer = tokenizer
        self.num_pos_examples = num_pos_examples
        self.num_neg_examples = num_neg_examples
        self.sys_prompt = sys_prompt
        
        if seed is not None:
            random.seed(seed)
        
        # Create training examples
        self.examples = []
        
        for rule, categories in dataset_dict.items():
            pos_examples = categories['positive_example']
            neg_examples = categories['negative_example']
            
            # Add positive examples as training targets
            for text in pos_examples:
                self.examples.append({
                    'rule': rule,
                    'text': text,
                    'label': 1,
                    'pos_pool': [p for p in pos_examples if p != text],
                    'neg_pool': neg_examples
                })
            
            # Add negative examples as training targets
            for text in neg_examples:
                self.examples.append({
                    'rule': rule,
                    'text': text,
                    'label': 0,
                    'pos_pool': pos_examples,
                    'neg_pool': [n for n in neg_examples if n != text]
                })
    
    def __len__(self):
        return len(self.examples)
    
    def _build_prompt_text(self, example, sampled_pos_examples, sampled_neg_examples):
        """Build prompt with rule, examples, and text to classify"""
        prompt_parts = [
            self.sys_prompt,
            "",
            f"Rule: \"{example['rule']}\"",
            ""
        ]
        
        if sampled_pos_examples:
            prompt_parts.append("Here are examples of comments that VIOLATE this rule:")
            for i, pos_ex in enumerate(sampled_pos_examples, 1):
                prompt_parts.append(f"{i}. {pos_ex}")
            prompt_parts.append("")
        
        if sampled_neg_examples:
            prompt_parts.append("Here are examples of comments that DO NOT violate this rule:")
            for i, neg_ex in enumerate(sampled_neg_examples, 1):
                prompt_parts.append(f"{i}. {neg_ex}")
            prompt_parts.append("")
        
        prompt_parts.extend([
            "Now, evaluate this comment:",
            f"\"{example['text']}\"",
            "",
            "Does it violate the rule?"
        ])
        
        return "\n".join(prompt_parts)
    
    def __getitem__(self, idx):
        example = self.examples[idx]
        
        # Sample examples for prompt context
        sampled_pos = random.sample(example['pos_pool'], 
                                   min(self.num_pos_examples, len(example['pos_pool']))) if example['pos_pool'] else []
        sampled_neg = random.sample(example['neg_pool'], 
                                   min(self.num_neg_examples, len(example['neg_pool']))) if example['neg_pool'] else []
        
        user_content = self._build_prompt_text(example, sampled_pos, sampled_neg)
        
        messages = [{"role": "user", "content": user_content}]
        
        text_with_template = self.tokenizer.apply_chat_template(
            messages, 
            add_generation_prompt=True,
            tokenize=False
        )
        
        # Add label
        label = "Yes" if example['label'] == 1 else "No"
        full_text = text_with_template + label + self.tokenizer.eos_token
        
        # Tokenize the full text
        tokenized = self.tokenizer(
            full_text,
            truncation=True,
            max_length=2048,
            return_tensors=None,  # Return lists instead of tensors
        )
        
        return tokenized


class ClassifyDataset(Dataset):
    """Wrapper dataset for classification"""
    def __init__(self, base_dataset):
        self.base_dataset = base_dataset
    
    def __len__(self):
        return len(self.base_dataset)
    
    def __getitem__(self, idx):
        return self.base_dataset[idx]


# ============================================================================
# Inference Functions
# ============================================================================
def get_yes_no_probabilities(model, tokenizer, prompts):
    """
    Get probabilities for Yes/No tokens
    """
    yes_token_id = tokenizer.encode("Yes", add_special_tokens=False)[0]
    no_token_id = tokenizer.encode("No", add_special_tokens=False)[0]
    
    inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True, max_length=2048).to(model.device)
    
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
    
    last_token_logits = logits[:, -1, :]
    
    yes_logits = last_token_logits[:, yes_token_id]
    no_logits = last_token_logits[:, no_token_id]
    
    yes_no_logits = torch.stack([no_logits, yes_logits], dim=1)
    probabilities = F.softmax(yes_no_logits, dim=1)
    
    yes_probs = probabilities[:, 1].cpu().numpy()
    return yes_probs


def build_inference_prompt(rule, text, pos_examples, neg_examples, sys_prompt, tokenizer, num_pos=2, num_neg=2):
    """Build prompt for inference"""
    # Sample examples
    sampled_pos = random.sample(pos_examples, min(num_pos, len(pos_examples))) if pos_examples else []
    sampled_neg = random.sample(neg_examples, min(num_neg, len(neg_examples))) if neg_examples else []
    
    prompt_parts = [
        sys_prompt,
        "",
        f"Rule: \"{rule}\"",
        ""
    ]
    
    if sampled_pos:
        prompt_parts.append("Here are examples of comments that VIOLATE this rule:")
        for i, pos_ex in enumerate(sampled_pos, 1):
            prompt_parts.append(f"{i}. {pos_ex}")
        prompt_parts.append("")
    
    if sampled_neg:
        prompt_parts.append("Here are examples of comments that DO NOT violate this rule:")
        for i, neg_ex in enumerate(sampled_neg, 1):
            prompt_parts.append(f"{i}. {neg_ex}")
        prompt_parts.append("")
    
    prompt_parts.extend([
        "Now, evaluate this comment:",
        f"\"{text}\"",
        "",
        "Does it violate the rule?"
    ])
    
    user_content = "\n".join(prompt_parts)
    messages = [{"role": "user", "content": user_content}]
    
    prompt = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=False,
    )
    
    return prompt


# ============================================================================
# Main Script
# ============================================================================

print("=" * 80)
print("Qwen-3 4B AWQ Training and Inference Pipeline")
print("=" * 80)

# ------------------------------------------------------------------------
# Step 1: Build dataset
# ------------------------------------------------------------------------
print("\n[Step 1/5] Building training dataset...")
dataset_dict = build_training_dataset_dict(DATA_PATH_TRAIN, DATA_PATH_TEST, seed=SEED)
print(f"Total rules: {len(dataset_dict)}")

total_pos = sum(len(v['positive_example']) for v in dataset_dict.values())
total_neg = sum(len(v['negative_example']) for v in dataset_dict.values())
print(f"Total positive examples: {total_pos}")
print(f"Total negative examples: {total_neg}")

# ------------------------------------------------------------------------
# Step 2: Initialize tokenizer
# ------------------------------------------------------------------------
print("\n[Step 2/5] Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.padding_side = "left"
print(f"Tokenizer loaded: {MODEL_NAME}")

# ------------------------------------------------------------------------
# Step 3: Create training dataset
# ------------------------------------------------------------------------
print("\n[Step 3/5] Creating training dataset...")
train_base_dataset = TrainingDataset(
    dataset_dict,
    tokenizer=tokenizer,
    num_pos_examples=NUM_POS_EXAMPLES,
    num_neg_examples=NUM_NEG_EXAMPLES,
    seed=SEED
)
train_dataset = ClassifyDataset(train_base_dataset)
print(f"Training dataset size: {len(train_dataset)}")

data_collator = DataCollatorForCompletionOnlyLM("Does it violate the rule?", tokenizer=tokenizer)

# ------------------------------------------------------------------------
# Step 4: Train model
# ------------------------------------------------------------------------
print("\n[Step 4/5] Training model...")
os.makedirs(MODEL_OUTPUT_PATH, exist_ok=True)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    trust_remote_code=True,
)
print(f"Model loaded: {MODEL_NAME}")

# Configure LoRA
lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    task_type=TaskType.CAUSAL_LM,
    bias='none',
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ]
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

training_args = TrainingArguments(
    output_dir=MODEL_OUTPUT_PATH,
    logging_steps=1,
    logging_strategy="steps",
    eval_strategy="no",
    save_strategy="epoch",
    num_train_epochs=EPOCH,
    optim="paged_adamw_8bit",
    lr_scheduler_type="linear",
    warmup_ratio=0.1,
    learning_rate=LR,
    weight_decay=0.01,
    bf16=is_torch_bf16_gpu_available(),
    fp16=not is_torch_bf16_gpu_available(),
    per_device_train_batch_size=TRAIN_BS,
    gradient_accumulation_steps=GRAD_ACC_NUM,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    group_by_length=False,
    report_to="none",
    seed=SEED,
    remove_unused_columns=False,
)

trainer = Trainer(
    model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator,
)

print("\nStarting training...")
trainer_output = trainer.train()

print("\nTraining complete!")
print(f"Final training loss: {trainer_output.training_loss:.4f}")
print(f"Saving model to: {MODEL_OUTPUT_PATH}")
trainer.save_model(MODEL_OUTPUT_PATH)

# ------------------------------------------------------------------------
# Step 5: Inference
# ------------------------------------------------------------------------
print("\n[Step 5/5] Running inference on test.csv...")

# Load inference data
inference_data = build_inference_data(DATA_PATH_TEST)
print(f"Total inference samples: {len(inference_data)}")

# Load trained model
print("Loading trained model...")
device = torch.device("cuda:0")
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    trust_remote_code=True,
).to(device)

trained_model = PeftModel.from_pretrained(base_model, MODEL_OUTPUT_PATH)
trained_model.eval()

# Prepare prompts
print("Preparing prompts...")
sys_prompt = "You are a content moderator for Reddit. Your task is to determine if comments violate community rules. Use the examples as guidance. Answer only 'Yes' if it violates the rule, or 'No' if it doesn't."

all_prompts = []
all_row_ids = []

for item in tqdm(inference_data):
    rule = item['rule']
    text = item['text']
    row_id = item['row_id']
    
    # Get examples for this rule
    pos_examples = dataset_dict.get(rule, {}).get('positive_example', [])
    neg_examples = dataset_dict.get(rule, {}).get('negative_example', [])
    
    prompt = build_inference_prompt(
        rule, text, pos_examples, neg_examples, 
        sys_prompt, tokenizer, NUM_POS_EXAMPLES, NUM_NEG_EXAMPLES
    )
    
    all_prompts.append(prompt)
    all_row_ids.append(row_id)

# Run inference in batches
print("Running inference...")
all_probabilities = []

with torch.no_grad():
    for i in tqdm(range(0, len(all_prompts), BATCH_SIZE)):
        batch_prompts = all_prompts[i:i + BATCH_SIZE]
        batch_probs = get_yes_no_probabilities(trained_model, tokenizer, batch_prompts)
        all_probabilities.extend(batch_probs)

# Create submission
submission_df = pd.DataFrame({
    'row_id': all_row_ids,
    'rule_violation': all_probabilities
})

submission_df.to_csv('submission3.csv', index=False)
print("\n" + "=" * 80)
print("Pipeline complete!")
print("=" * 80)
print(f"Submission saved to: submission3.csv")
print(f"Total predictions: {len(submission_df)}")
print(f"\nSample predictions:")
print(submission_df.head(10))


Writing train_on_gpu_1_qwen3.py


In [4]:
import subprocess
import os
import torch
import gc
import pandas as pd

# Check if test.csv has less than 20 rows
test_df = pd.read_csv('/kaggle/input/jigsaw-agile-community-rules/test.csv')  # Adjust path if needed

if len(test_df) < 20:
    print(f"Test dataset has only {len(test_df)} rows (less than 20)")
    print("Generating dummy submission.csv with all 0.5 probabilities...")
    
    # Create dummy submission with 0.5 for all rows
    dummy_submission = pd.DataFrame({
        'row_id': test_df['row_id'],
        'rule_violation': [0.5] * len(test_df)
    })
    
    dummy_submission.to_csv('submission.csv', index=False)
    print("Dummy submission.csv created successfully!")
    print(dummy_submission.head())
    
else:
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    p1 = subprocess.Popen(['python', 'train_on_gpu_0_gemma3.py'])
    
    os.environ['CUDA_VISIBLE_DEVICES'] = '1'
    p2 = subprocess.Popen(['python', 'train_on_gpu_1_qwen3.py'])
    
    p1.wait()
    torch.cuda.empty_cache()
    gc.collect()
    
    print("Gemma3 training completed on GPU 0!")
    
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    p3 = subprocess.Popen(['python', 'train_on_gpu_0_bert.py'])
    
    p3.wait()
    print("BERT training completed on GPU 0!")
    
    # Wait for Qwen3 to finally complete
    p2.wait()
    print("Qwen3 training completed on GPU 1!")
    
    torch.cuda.empty_cache()
    gc.collect()
    print("\nAll scripts completed successfully!")

Test dataset has only 10 rows (less than 20)
Generating dummy submission.csv with all 0.5 probabilities...
Dummy submission.csv created successfully!
   row_id  rule_violation
0    2029             0.5
1    2030             0.5
2    2031             0.5
3    2032             0.5
4    2033             0.5


In [5]:
import pandas as pd
import numpy as np

# Load submission files (fixed typo: 'submission')
submission_df_gemma3 = pd.read_csv('submission1.csv')
submission_df_bert = pd.read_csv('submission2.csv')
submission_df_qwen3 = pd.read_csv('submission3.csv')

# Calculate fractional ranks
r_gemma3 = submission_df_gemma3['rule_violation'].rank(method='average') / (len(submission_df_gemma3) + 1)
r_bert = submission_df_bert['rule_violation'].rank(method='average') / (len(submission_df_bert) + 1)
r_qwen3 = submission_df_qwen3['rule_violation'].rank(method='average') / (len(submission_df_qwen3) + 1)

# Combine the ranks (fixed logic: used r_gemma3 instead of r_qwen3 twice)
blend = 0.4 * r_qwen3 + 0.3 * r_bert + 0.3 * r_gemma3

# Create the final submission DataFrame (fixed: defined 'q' before use)
# This copies the structure (e.g., ID columns) from one of the submissions
q = submission_df_gemma3.copy()
q['rule_violation'] = blend

# Save the final submission
q.to_csv('submission.csv', index=False)
print(q.head(10))

FileNotFoundError: [Errno 2] No such file or directory: 'submission1.csv'