In [None]:
!pip install -U \
  torch \
  transformers \
  datasets \
  accelerate \
  evaluate \
  scikit-learn \
  numpy \
  pandas


In [None]:
# ==== Setup (clean version)
!pip -q install transformers datasets accelerate evaluate scikit-learn matplotlib

import os
import pandas as pd
import numpy as np
import evaluate
import matplotlib.pyplot as plt
from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)

# ==== Disable W&B and other external loggers
os.environ["WANDB_DISABLED"] = "true"        # Disable W&B
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" # Optional cleaner logs

# ==== Paths (chunked for BERT)
TRAIN = "./bluescrubs_train_chunked_bert.csv"
VAL   = "./bluescrubs_val_chunked_bert.csv"
TEST  = "./bluescrubs_test_chunked_bert.csv"

# ==== Load data and convert to Hugging Face Datasets
def to_hfds(path):
    df = pd.read_csv(path)
    df["labels"] = df["label"].astype(int)   # âœ… rename for Trainer compatibility
    return Dataset.from_pandas(df[["text", "labels"]])

ds = DatasetDict({
    "train": to_hfds(TRAIN),
    "validation": to_hfds(VAL),
    "test": to_hfds(TEST)
})

# ==== Tokenizer and Preprocessing
MODEL_NAME = "emilyalsentzer/Bio_ClinicalBERT"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

def preprocess(batch):
    return tokenizer(batch["text"], truncation=True, max_length=512)

# âœ… Keep 'labels' column; remove only 'text'
tokenized = ds.map(preprocess, batched=True, remove_columns=["text"])

# ==== Model
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)

# ==== Metrics
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy":  accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "precision": precision.compute(predictions=preds, references=labels, average="binary")["precision"],
        "recall":    recall.compute(predictions=preds, references=labels, average="binary")["recall"],
        "f1":        f1.compute(predictions=preds, references=labels, average="binary")["f1"],
    }

# ==== Training Arguments (No W&B)
args = TrainingArguments(
    output_dir="./bioclinicalbert_bluescrubs_baseline",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    fp16=True,
    logging_strategy="epoch",     # Log at each epoch
    logging_dir="./logs",         # Local logs only
    report_to=[],                 # ðŸš« disables W&B/TensorBoard/MLflow
    load_best_model_at_end=True,  # optional: load best model by eval metric
)

# ==== Trainer
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# ==== Train
trainer.train()

# ==== Evaluate on Test Set
results = trainer.evaluate(tokenized["test"], metric_key_prefix="test")
print("\n===== Test Results =====")
for k, v in results.items():
    print(f"{k}: {v:.4f}")

# ==== Visualization inside Colab
logs = pd.DataFrame(trainer.state.log_history)
display(logs.tail())

# Plot Loss
plt.figure(figsize=(8, 5))
plt.plot(logs["epoch"], logs["loss"], label="Training Loss", marker="o")
if "eval_loss" in logs.columns:
    plt.plot(logs["epoch"], logs["eval_loss"], label="Validation Loss", marker="o")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training vs Validation Loss")
plt.legend()
plt.grid(True)
plt.show()

# Plot Accuracy and F1 if available
if "eval_accuracy" in logs.columns and "eval_f1" in logs.columns:
    plt.figure(figsize=(8, 5))
    plt.plot(logs["epoch"], logs["eval_accuracy"], label="Validation Accuracy", marker="o")
    plt.plot(logs["epoch"], logs["eval_f1"], label="Validation F1", marker="o")
    plt.xlabel("Epoch")
    plt.ylabel("Score")
    plt.title("Validation Accuracy and F1 Score per Epoch")
    plt.legend()
    plt.grid(True)
    plt.show()


In [None]:
!pip install -U transformers==4.45.0
