In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import cv2
import numpy as np
import pickle
from torchvision.transforms import transforms
from PIL import Image
from torch.amp import autocast, GradScaler
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
from torchvision.models import densenet121
from read_data import ChestXrayDataSet
from torchvision.transforms import AutoAugment, AutoAugmentPolicy
from torch.optim.lr_scheduler import ReduceLROnPlateau

import numpy as np

# ===================== CONSTANTS =====================
DATA_DIR = "C:/Users/zafer/OneDrive/Masaüstü/224NIH/dataset/images-224/images-224"
TRAIN_IMAGE_LIST = 'labels/train_list_balanced.txt'
VAL_IMAGE_LIST = 'labels/val_list_balanced.txt'
CKPT_PATH = ''  # Pretrained model path
SAVE_PATH = 'best_model/5finding_trained_densenet121.pth.tar'  # Path to save the trained model
N_CLASSES = 5
#CLASS_NAMES = [
#    'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass',
#    'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation', 'Edema',
#    'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia'
#]
CLASS_NAMES = ["Emphysema", "Cardiomegaly", "Edema", "Effusion", "Atelectasis"]

BATCH_SIZE = 16
NUM_EPOCHS = 10
LEARNING_RATE = 0.001
NUM_WORKERS = 0


# ===================== CUSTOM CLAHE TRANSFORM =====================
class CLAHETransform:
    """Apply CLAHE to an image."""
    def __call__(self, img):
        img = np.array(img, dtype=np.uint8)  # Convert PIL image to numpy array with explicit dtype
        if len(img.shape) == 3:  # For RGB images
            lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
            l, a, b = cv2.split(lab)
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            l = clahe.apply(l)
            lab = cv2.merge((l, a, b))
            img = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
        else:  # For grayscale images
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            img = clahe.apply(img)
        return Image.fromarray(img)  # Convert back to PIL image

# ===================== UPDATED TRANSFORMS =====================
def create_train_transforms():
    """Create transformations for training with enhanced augmentation."""
    return transforms.Compose([
        transforms.Resize(256),
        CLAHETransform(),  # Apply CLAHE
        AutoAugment(AutoAugmentPolicy.IMAGENET),  # Advanced data augmentation
        transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

def create_val_transforms():
    """Create transformations for validation with CLAHE."""
    return transforms.Compose([
        transforms.Resize(256),
        CLAHETransform(),  # Apply CLAHE
        transforms.TenCrop(224),
        transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
        transforms.Lambda(lambda crops: torch.stack([transforms.Normalize([0.485, 0.456, 0.406], 
                                                                          [0.229, 0.224, 0.225])(crop) for crop in crops]))
    ])

# ===================== DATA LOADERS =====================
def get_dataloaders():
    train_dataset = ChestXrayDataSet(data_dir=DATA_DIR, image_list_file=TRAIN_IMAGE_LIST,
                                     transform=create_train_transforms())
    val_dataset = ChestXrayDataSet(data_dir=DATA_DIR, image_list_file=VAL_IMAGE_LIST,
                                   transform=create_val_transforms())

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                              num_workers=NUM_WORKERS, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False,
                            num_workers=NUM_WORKERS, pin_memory=True)
    return train_loader, val_loader


# ===================== MODEL DEFINITION =====================
class DenseNet121(nn.Module):
    """DenseNet121 with a sigmoid activation for multi-label classification."""
    def __init__(self, out_size):
        super(DenseNet121, self).__init__()
        self.densenet121 = densenet121(pretrained=True)
        num_ftrs = self.densenet121.classifier.in_features
        self.densenet121.classifier = nn.Linear(num_ftrs, out_size)

    def forward(self, x):
        return self.densenet121(x)


def load_pretrained_model():
    """Load a pretrained DenseNet121 model."""
    model = DenseNet121(N_CLASSES)
    if os.path.isfile(CKPT_PATH):
        checkpoint = torch.load(CKPT_PATH, map_location="cuda:0")
        if 'state_dict' in checkpoint:
            state_dict = checkpoint['state_dict']
            state_dict = {
                k.replace('module.', '')
                 .replace('norm.1', 'norm1')
                 .replace('conv.1', 'conv1')
                 .replace('norm.2', 'norm2')
                 .replace('conv.2', 'conv2')
                 .replace("classifier.0","classifier"): v
                for k, v in state_dict.items()
            }
            model.load_state_dict(state_dict)
        else:
            model.load_state_dict(checkpoint)
        print("Loaded pretrained model.")
    return model

from sklearn.metrics import roc_curve

def compute_optimal_thresholds(gt, pred):
    """
    Compute optimal thresholds for each class based on validation set predictions.
    Returns a list of thresholds for all classes.
    """
    thresholds = []
    for i in range(N_CLASSES):
        fpr, tpr, thresh = roc_curve(gt[:, i], pred[:, i])
        j_statistic = tpr - fpr
        optimal_idx = np.argmax(j_statistic)
        optimal_threshold = thresh[optimal_idx]
        thresholds.append(optimal_threshold)
    return thresholds

# ===================== TRAINING LOOP =====================
from torch.optim.lr_scheduler import ReduceLROnPlateau

def train_model(model, train_loader, val_loader, num_epochs, learning_rate):
    model = model.cuda()
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scaler = GradScaler()

    # Initialize scheduler
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True)

    best_auroc = 0.0  # Initialize best_auroc
    best_thresholds = None  # To store best thresholds

    for epoch in range(num_epochs):
        print(f"Epoch [{epoch + 1}/{num_epochs}]")
        
        # Training Phase
        model.train()
        train_loss = 0.0
        for inputs, targets in tqdm(train_loader, desc="Training"):
            inputs, targets = inputs.cuda(), targets.cuda()

            optimizer.zero_grad()
            with autocast(device_type='cuda'):
                outputs = model(inputs)
                loss = criterion(outputs, targets)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss += loss.item()

        train_loss /= len(train_loader)
        print(f"Training Loss: {train_loss:.4f}")

        # Validation Phase
        print("Evaluating on validation set...")
        val_auroc, gt, pred = evaluate_model(model, val_loader)
        print(f"Validation AUROC: {val_auroc:.4f}")

        # Adjust learning rate based on validation performance
        scheduler.step(val_auroc)

        # Save Best Model and Thresholds
        if val_auroc > best_auroc:
            best_auroc = val_auroc
            best_thresholds = compute_optimal_thresholds(gt, pred)
            os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
            torch.save(model.state_dict(), SAVE_PATH)
            print(f"Saved Best Model with AUROC: {best_auroc:.4f}")

    print("Training Complete!")
    print(f"Best Thresholds: {best_thresholds}")
    return best_thresholds



# ===================== EVALUATE MODEL =====================
def evaluate_model(model, val_loader):
    """Evaluate the model and compute AUROC."""
    model.eval()  # Set the model to evaluation mode
    gt, pred = [], []

    with torch.no_grad():
        for inp, target in tqdm(val_loader, desc="Evaluating", leave=True):
            target = target.cuda()
            bs, n_crops, c, h, w = inp.size()
            inp = inp.view(-1, c, h, w).cuda()

            # Get raw logits and apply sigmoid
            logits = model(inp)
            output_mean = torch.sigmoid(logits.view(bs, n_crops, -1).mean(1))

            gt.append(target.cpu())
            pred.append(output_mean.cpu())

    gt = torch.cat(gt).numpy()
    pred = torch.cat(pred).numpy()
    AUROCs = [roc_auc_score(gt[:, i], pred[:, i]) for i in range(N_CLASSES)]
    return np.mean(AUROCs), gt, pred

# ===================== MAIN SCRIPT =====================
def main():
    train_loader, val_loader = get_dataloaders()
    model = load_pretrained_model()
    train_model(model, train_loader, val_loader, NUM_EPOCHS, LEARNING_RATE)


if __name__ == "__main__":
    main()





Epoch [1/10]


Training: 100%|██████████| 63/63 [00:16<00:00,  3.79it/s]


Training Loss: 0.3707
Evaluating on validation set...


Evaluating: 100%|██████████| 63/63 [00:32<00:00,  1.95it/s]


Validation AUROC: 0.6302
Saved Best Model with AUROC: 0.6302
Epoch [2/10]


Training: 100%|██████████| 63/63 [00:13<00:00,  4.52it/s]


Training Loss: 0.3391
Evaluating on validation set...


Evaluating: 100%|██████████| 63/63 [00:32<00:00,  1.96it/s]


Validation AUROC: 0.5294
Epoch [3/10]


Training: 100%|██████████| 63/63 [00:13<00:00,  4.51it/s]


Training Loss: 0.3425
Evaluating on validation set...


Evaluating: 100%|██████████| 63/63 [00:31<00:00,  2.00it/s]


Validation AUROC: 0.5842
Epoch [4/10]


Training: 100%|██████████| 63/63 [00:13<00:00,  4.56it/s]


Training Loss: 0.3429
Evaluating on validation set...


Evaluating: 100%|██████████| 63/63 [00:31<00:00,  1.99it/s]


Validation AUROC: 0.5657
Epoch [5/10]


Training: 100%|██████████| 63/63 [00:13<00:00,  4.56it/s]


Training Loss: 0.3350
Evaluating on validation set...


Evaluating: 100%|██████████| 63/63 [00:31<00:00,  1.98it/s]


Validation AUROC: 0.6657
Saved Best Model with AUROC: 0.6657
Epoch [6/10]


Training: 100%|██████████| 63/63 [00:13<00:00,  4.51it/s]


Training Loss: 0.3285
Evaluating on validation set...


Evaluating: 100%|██████████| 63/63 [00:31<00:00,  2.01it/s]


Validation AUROC: 0.6536
Epoch [7/10]


Training: 100%|██████████| 63/63 [00:13<00:00,  4.61it/s]


Training Loss: 0.3318
Evaluating on validation set...


Evaluating: 100%|██████████| 63/63 [00:31<00:00,  1.98it/s]


Validation AUROC: 0.6261
Epoch [8/10]


Training: 100%|██████████| 63/63 [00:13<00:00,  4.55it/s]


Training Loss: 0.3287
Evaluating on validation set...


Evaluating: 100%|██████████| 63/63 [00:31<00:00,  2.01it/s]


Validation AUROC: 0.6724
Saved Best Model with AUROC: 0.6724
Epoch [9/10]


Training: 100%|██████████| 63/63 [00:13<00:00,  4.52it/s]


Training Loss: 0.3285
Evaluating on validation set...


Evaluating: 100%|██████████| 63/63 [00:31<00:00,  2.01it/s]


Validation AUROC: 0.6687
Epoch [10/10]


Training: 100%|██████████| 63/63 [00:13<00:00,  4.61it/s]


Training Loss: 0.3258
Evaluating on validation set...


Evaluating: 100%|██████████| 63/63 [00:31<00:00,  1.99it/s]

Validation AUROC: 0.6557
Training Complete!
Best Thresholds: [np.float32(0.05478248), np.float32(0.040272005), np.float32(0.061466575), np.float32(0.20306452), np.float32(0.21073325)]





In [3]:
def classify_with_thresholds(pred, thresholds):
    """Apply class-specific thresholds to predictions."""
    return (pred > thresholds).astype(int)


In [4]:
best_thresholds = train_model(model, train_loader, val_loader, NUM_EPOCHS, LEARNING_RATE)
# For predictions:
final_predictions = classify_with_thresholds(pred, best_thresholds)


NameError: name 'model' is not defined