In [None]:
# =====================================
# Longformer Baseline (BlueScrubs) â€” FINAL STABLE A100 VERSION
# =====================================

!pip -q install transformers==4.46.3 datasets accelerate evaluate matplotlib torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

# ==== Disable external loggers (W&B, Hugging Face telemetry)
import os, gc, torch
os.environ["WANDB_DISABLED"] = "true"
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"

# ==== Clean GPU memory before starting
gc.collect()
torch.cuda.empty_cache()

# ==== Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    AutoConfig
)
import evaluate

print("âœ… Setup complete. GPU available:", torch.cuda.get_device_name(0))

# ==== Paths
TRAIN = "./bluescrubs_train_chunked_longformer.csv"
VAL   = "./bluescrubs_val_chunked_longformer.csv"
TEST  = "./bluescrubs_test_chunked_longformer.csv"

# ==== Convert CSVs to Hugging Face Datasets
def to_hfds(path):
    df = pd.read_csv(path)
    df["label"] = df["label"].astype(int)
    return Dataset.from_pandas(df[["text", "label"]])

ds = DatasetDict({
    "train": to_hfds(TRAIN),
    "validation": to_hfds(VAL),
    "test": to_hfds(TEST)
})

# ==== Tokenization (with labels preserved)
MODEL_NAME = "allenai/longformer-base-4096"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

def preprocess(batch):
    enc = tokenizer(batch["text"], truncation=True, max_length=4096)
    enc["labels"] = batch["label"]
    return enc

print("ðŸ”„ Tokenizing datasets...")
tokenized = ds.map(preprocess, batched=True, remove_columns=["text"])
print("âœ… Tokenization complete. Columns now:", tokenized["train"].column_names)

# ==== Model
config = AutoConfig.from_pretrained(MODEL_NAME, num_labels=2)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, config=config)
print("âœ… Model loaded successfully on", torch.cuda.get_device_name(0))

# ==== Metrics
accuracy  = evaluate.load("accuracy")
f1        = evaluate.load("f1")
precision = evaluate.load("precision")
recall    = evaluate.load("recall")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy":  accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "precision": precision.compute(predictions=preds, references=labels, average="binary")["precision"],
        "recall":    recall.compute(predictions=preds, references=labels, average="binary")["recall"],
        "f1":        f1.compute(predictions=preds, references=labels, average="binary")["f1"],
    }

# ==== Training Arguments (Fixed to match save/eval strategy)
args = TrainingArguments(
    output_dir="./longformer_bluescrubs_baseline",
    eval_strategy="steps",              # evaluate periodically
    eval_steps=1000,                    # every 1000 steps
    save_strategy="steps",              # âœ… match strategy
    save_steps=1000,
    save_total_limit=2,
    load_best_model_at_end=True,        # works fine now
    learning_rate=1e-5,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=2,
    weight_decay=0.01,
    fp16=True,
    dataloader_num_workers=0,
    logging_steps=100,
    report_to=[]                        # disable W&B
)

# ==== Trainer
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# ==== Train (first run â€” no checkpoint yet)
print("ðŸš€ Starting training...")
trainer.train()  # changed from resume_from_checkpoint=True â†’ clean start

# ==== Evaluate on Test Set
print("\nðŸ§ª Evaluating on test set...")
test_results = trainer.evaluate(tokenized["test"], metric_key_prefix="test")
print("\n===== TEST RESULTS =====")
for k, v in test_results.items():
    print(f"{k}: {v:.4f}")

# ==== Plot Training & Validation Loss
log_history = pd.DataFrame(trainer.state.log_history)
train_loss = log_history[log_history["loss"].notna()]
eval_metrics = log_history[log_history["eval_loss"].notna()]

plt.figure(figsize=(8,5))
plt.plot(train_loss["step"], train_loss["loss"], label="Train Loss")
plt.plot(eval_metrics["step"], eval_metrics["eval_loss"], label="Validation Loss")
plt.xlabel("Steps")
plt.ylabel("Loss")
plt.legend()
plt.title("Longformer Baseline: Training vs Validation Loss")
plt.show()

# ==== Save results
pd.DataFrame([test_results]).to_csv("longformer_results.csv", index=False)
log_history.to_csv("longformer_training_log.csv", index=False)
print("\nâœ… Saved test results to longformer_results.csv and training log to longformer_training_log.csv")


In [None]:
!pip uninstall -y transformers tokenizers simpletransformers
!pip install -U --force-reinstall transformers==4.46.3 datasets evaluate accelerate


In [None]:
import transformers, inspect
print("Transformers version:", transformers.__version__)

from transformers import TrainingArguments
print("TrainingArguments file path:", inspect.getfile(TrainingArguments))


In [None]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="./test_dir",
    evaluation_strategy="epoch"
)

print("âœ… Works fine now!")


In [None]:
!nvidia-smi


In [None]:
!pip install -U transformers==4.46.3 datasets evaluate accelerate matplotlib


In [None]:
!pip install evaluate