In [1]:
import gc
import os
import cv2
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import itertools
from collections import Counter, defaultdict
from PIL import Image

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix 

import torch
import torch.nn as nn
from torchsummary import summary
from torch.cuda.amp import autocast
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import WeightedRandomSampler

# Import PyTorch Metric Learning
from pytorch_metric_learning import losses, miners, reducers, distances
from pytorch_metric_learning.utils import accuracy_calculator

import albumentations as A
from albumentations.pytorch import ToTensorV2 #np.array -> torch.tensor (B, 3, H, W)
import timm

import warnings
warnings.filterwarnings('ignore')

In [2]:
print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(0))

True
0
NVIDIA GeForce GTX 1050


In [3]:
num_classes = 3
root_dir = '../datasets/'
csv_train_file = 'train_data_with_folds.csv'
class_list = ['normal', 'preplus', 'plus']
label_dict = {cls: i for i, cls in enumerate(class_list)}

df = pd.read_csv(os.path.join(root_dir, csv_train_file))

In [4]:
class TripletDataset(Dataset):
    def __init__(self, root_dir, df, mode, transform=None):
        self.root = root_dir
        self.transform = transform
        self.mode = mode
        self.df = df
        self.img_path_list = df['path'].tolist()
        
        if 'label' in df.columns:
            self.labels = df['label'].tolist()
        else:
            self.labels = None   
    
    def __len__(self):
        return len(self.img_path_list)
        
    def __getitem__(self, idx):
        image_path = os.path.join(self.root, self.img_path_list[idx])
        
        # Try OpenCV first (faster)
        try:
            image = cv2.imread(image_path)
            if image is None:
                raise ValueError("OpenCV couldn't read the image")
            # Convert BGR (OpenCV default) to RGB
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
        # Fallback to PIL if OpenCV fails
        except:
            try:
                image = Image.open(image_path).convert('RGB')
                image = np.array(image)
            except Exception as e:
                raise Exception(f"Failed to load image {image_path} with both OpenCV and PIL: {str(e)}")
        
        # Apply transforms if provided
        if self.transform is not None:
            # Ensure image is in correct format for transforms
            transformed = self.transform(image=image)
            image = transformed['image']

        if self.mode == 'test':
            return image
        else:
            label = self.labels[idx]
            return image, torch.tensor(label).long()

def get_transforms(image_size):
    transforms_train = A.Compose([
        A.Resize(image_size, image_size),
        A.ImageCompression(quality_lower=80, quality_upper=100, p=0.25),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, border_mode=0, p=0.5),
        # A.Flip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.CoarseDropout(num_holes_range=(1,1), hole_height_range=(8, 32), hole_width_range=(8, 32), p=0.25),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ])
    
    transforms_val = A.Compose([
        A.Resize(image_size, image_size),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ])

    return transforms_train, transforms_val

class UnNormalize(object):
  def __init__(self, mean, std):
    self.mean = mean
    self.std = std

  def __call__(self, tensor):
    """
    Args:
      tensor (Tensor): Tensor image of size (C, H, W) to be normalized'
    Returns:
      Tensor: Normalized image
    """
    for t, m, s in zip(tensor, self.mean, self.std):
      t.mul_(s).add_(m)
      #The normalize code -> t.sub_(m).div_(s)
    return tensor

unorm = UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))

def get_sampler(dataset): # WeightedRandomSampler
    labels = [dataset.dataset[idx][1] for idx in range(len(dataset))]
    class_counts = np.bincount(labels, minlength=num_classes)
    class_weights = 1.0 / (class_counts + 1e-6)
    sample_weights = [class_weights[label] for label in labels]
    sampler = WeightedRandomSampler(
        weights=sample_weights,
        num_samples=len(sample_weights),
        replacement=True
    )
    return sampler

# Modified model for embedding output
class EmbeddingModel(nn.Module):
    def __init__(self, num_classes, embedding_size=512):
        super().__init__()
        self.n_classes = num_classes
        self.embedding_size = embedding_size
        
        # Use EfficientNet as the backbone
        self.backbone = timm.create_model(
            'resnet101',
            pretrained=True,
            features_only=True,  # Remove classifier head
        )
        
        # Get the feature dimension from backbone
        self.in_features = self.backbone.num_features
        
        # Add embedding layer
        self.embedding = nn.Sequential(
            nn.Linear(self.in_features, self.embedding_size),
            nn.BatchNorm1d(self.embedding_size),
            nn.ReLU(inplace=True)
        )
        
        # Classifier layer (for regular classification task)
        self.classifier = nn.Linear(self.embedding_size, self.n_classes)

    def forward(self, x, return_embeddings=False):
        features = self.backbone(x)
        embeddings = self.embedding(features)
        
        if return_embeddings:
            return embeddings
        
        logits = self.classifier(embeddings)
        return logits, embeddings

# Plotting function
def plot_fold_history(fold, history):
    actual_epochs = len(history['train_auc'])
    plt.figure(figsize=(15, 5))
    
    # Plot auc
    plt.subplot(1, 2, 1)
    plt.plot(range(1, actual_epochs + 1), history['train_auc'], '-o', label='Train AUC', color='skyblue')
    plt.plot(range(1, actual_epochs + 1), history['val_auc'], '-o', label='Val AUC', color='lightcoral')
    plt.scatter(history['best_val_auc_epoch'], history['best_val_auc'], s=200, color='lightcoral')
    plt.text(history['best_val_auc_epoch'], history['best_val_auc'], f'max {history["best_val_auc"]:.4f}', size=12)
    plt.xlabel('Epoch')
    plt.ylabel('auc')
    plt.title(f'Fold {fold + 1} Auc')
    plt.legend()
    
    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(range(1, actual_epochs + 1), history['train_loss'], '-o', label='Train Loss', color='skyblue')
    plt.plot(range(1, actual_epochs + 1), history['val_loss'], '-o', label='Val Loss', color='lightcoral')
    plt.plot(range(1, actual_epochs + 1), history['triplet_loss'], '-o', label='Triplet Loss', color='green')
    plt.scatter(history['best_val_loss_epoch'], history['best_val_loss'], s=200, color='lightcoral')
    plt.text(history['best_val_loss_epoch'], history['best_val_loss'], f'min {history["best_val_loss"]:.4f}', size=12)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(f'Fold {fold + 1} Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

class EarlyStopper:
    def __init__(self, min_delta=0, patience=1):
        self.min_delta = min_delta
        self.patience = patience
        self.max_val_auc = -float('inf')
        self.count = 0
        
    def early_stop(self, val_auc):
        if self.max_val_auc < val_auc:
            self.max_val_auc = val_auc
            self.count = 0
        elif self.max_val_auc > val_auc + self.min_delta:
            self.count += 1
            if self.count >= self.patience:
                return True
        return False
        
# Custom GradualWarmupSchedulerV2
class GradualWarmupSchedulerV2:
    def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None):
        self.optimizer = optimizer
        self.multiplier = multiplier
        self.total_epoch = total_epoch
        self.after_scheduler = after_scheduler
        self.finished = False
        self.last_epoch = -1
        self.base_lrs = [group['lr'] for group in optimizer.param_groups]
    
    def step(self, epoch=None):
        if epoch is None:
            self.last_epoch += 1
        else:
            self.last_epoch = epoch
        
        if self.last_epoch <= self.total_epoch:
            # Warmup phase
            for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
                param_group['lr'] = lr
        elif self.after_scheduler:
            # Transition to after_scheduler
            if not self.finished:
                self.after_scheduler.base_lrs = [base_lr * self.multiplier for base_lr in self.base_lrs]
                self.finished = True
            self.after_scheduler.step(self.last_epoch - self.total_epoch)
    
    def get_lr(self):
        if self.last_epoch > self.total_epoch:
            if self.after_scheduler:
                if not self.finished:
                    self.after_scheduler.base_lrs = [base_lr * self.multiplier for base_lr in self.base_lrs]
                    self.finished = True
                return self.after_scheduler.get_lr()
            return [base_lr * self.multiplier for base_lr in self.base_lrs]
        if self.multiplier == 1.0:
            return [base_lr * (float(self.last_epoch) / self.total_epoch) for base_lr in self.base_lrs]
        else:
            return [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = timm.create_model(
            'resnet101',
            pretrained=True,
            features_only=True,  # Remove classifier head
        ).to(device)
summary(model, input_size=(3, 512, 512))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           9,408
       BatchNorm2d-2         [-1, 64, 256, 256]             128
              ReLU-3         [-1, 64, 256, 256]               0
         MaxPool2d-4         [-1, 64, 128, 128]               0
            Conv2d-5         [-1, 64, 128, 128]           4,096
       BatchNorm2d-6         [-1, 64, 128, 128]             128
              ReLU-7         [-1, 64, 128, 128]               0
            Conv2d-8         [-1, 64, 128, 128]          36,864
       BatchNorm2d-9         [-1, 64, 128, 128]             128
         Identity-10         [-1, 64, 128, 128]               0
             ReLU-11         [-1, 64, 128, 128]               0
         Identity-12         [-1, 64, 128, 128]               0
           Conv2d-13        [-1, 256, 128, 128]          16,384
      BatchNorm2d-14        [-1, 256, 1

In [None]:
def train_epoch(model, loader, optimizer, criterion, triplet_loss, miner, device):
    train_loss_meter = AverageMeter()
    triplet_loss_meter = AverageMeter()
    ce_loss_meter = AverageMeter()
    model.train()
    
    PROBS = []
    TARGETS = []
    
    for img, label in loader:
        optimizer.zero_grad()
        inputs = img.to(device)
        targets = label.to(device)
        
        # Get both logits and embeddings
        logits, embeddings = model(inputs)
        
        # Calculate classification loss
        ce_loss = criterion(logits, targets)
        
        # Calculate triplet loss with hard mining
        hard_pairs = miner(embeddings, targets)
        trip_loss = triplet_loss(embeddings, targets, hard_pairs)
        
        # Combine losses
        loss = ce_loss + trip_loss
        
        loss.backward()
        optimizer.step()
        
        train_loss_meter.update(loss.item(), inputs.size(0))
        ce_loss_meter.update(ce_loss.item(), inputs.size(0))
        triplet_loss_meter.update(trip_loss.item() if trip_loss != 0 else 0, inputs.size(0))
        
        with torch.no_grad():
            probs = F.softmax(logits.float(), dim=1).cpu().numpy()
            PROBS.append(probs)
            TARGETS.append(targets.cpu().numpy())
    
    # Concatenate all predictions and targets
    PROBS = np.concatenate(PROBS)
    TARGETS = np.concatenate(TARGETS)

    if not np.allclose(PROBS.sum(axis=1), 1.0, atol=1e-5):
        print("PROBS not summing to 1!")
    if np.any(TARGETS < 0) or np.any(TARGETS >= num_classes):
        print(f"Invalid TARGETS values: {TARGETS}")

    # Compute AUC over entire epoch
    try:
        train_auc = roc_auc_score(y_true=TARGETS, y_score=PROBS, multi_class='ovr')
    except ValueError as e:
        print(f"Sample of PROBS: {PROBS[0]}, sum: {PROBS.sum(axis=1)}")
        print(f"Error: {e}")
        train_auc = 0.0
        
    return train_loss_meter.avg, train_auc, triplet_loss_meter.avg, ce_loss_meter.avg

# Validation epoch
def val_epoch(model, loader, criterion, triplet_loss, miner, device):
    model.eval()
    val_loss_meter = AverageMeter()
    triplet_loss_meter = AverageMeter()
    ce_loss_meter = AverageMeter()
    
    PROBS = []
    TARGETS = []
    
    with torch.no_grad():
        for img, label in loader:
            inputs = img.to(device)
            targets = label.to(device)
            
            with autocast():
                logits, embeddings = model(inputs)
                
                # Calculate classification loss
                ce_loss = criterion(logits, targets)
                
                # Calculate triplet loss with hard mining
                hard_pairs = miner(embeddings, targets)
                trip_loss = triplet_loss(embeddings, targets, hard_pairs)
                
                # Combine losses
                loss = ce_loss + trip_loss
            
            val_loss_meter.update(loss.item(), inputs.size(0))
            ce_loss_meter.update(ce_loss.item(), inputs.size(0))
            triplet_loss_meter.update(trip_loss.item() if trip_loss != 0 else 0, inputs.size(0))
            
            probs = F.softmax(logits.float(), dim=1).cpu().numpy()
            PROBS.append(probs)
            TARGETS.append(targets.cpu().numpy())
    
    PROBS = np.concatenate(PROBS)
    TARGETS = np.concatenate(TARGETS)
    
    try:
        val_auc = roc_auc_score(TARGETS, PROBS, multi_class='ovr')
    except ValueError as e:
        print(f"Val AUC failed: {e}, Unique targets: {np.unique(TARGETS)}, Probs shape: {PROBS.shape}")
        val_auc = 0.0
    
    return val_loss_meter.avg, val_auc, triplet_loss_meter.avg, ce_loss_meter.avg

In [None]:


def run(fold, df, root_dir, test_df, transforms_train, transforms_val, num_workers, n_epochs):
    train_df = df[df['fold'] != fold].reset_index(drop=True)
    val_df = df[df['fold'] == fold].reset_index(drop=True)

    # Datasets
    train_ds = TWFoodDataset(root_dir, train_df,'train', transform=transforms_train)
    val_ds = TWFoodDataset(root_dir, val_df,'train', transform=transforms_val)
    
    # Data loaders
    train_loader = DataLoader(
        train_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=True, 
        pin_memory=True, 
        prefetch_factor=2
    )
    val_loader = DataLoader(
        val_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=False, 
        pin_memory=True, 
        prefetch_factor=2
    )

    # Model, optimizer, criterion
    model = EmbeddingModel(num_classes=num_classes, embedding_size=128).to(device)
    optimizer = AdamW(model.parameters(), lr=lr)
    
    # Classification loss
    criterion = nn.CrossEntropyLoss()
    
    # Distance function for triplet loss
    distance = distances.CosineSimilarity()
    
    # Batch hard miner for triplet loss
    miner = miners.BatchHardMiner(
        pos_strategy="hard",  # hardest positive
        neg_strategy="hard",  # hardest negative
        distance=distance
    )
    
    # Triplet loss with margin
    triplet_loss = losses.TripletMarginLoss(
        margin=0.3,
        distance=distance,
        reducer=reducers.AvgNonZeroReducer()
    )

    # Learning rate scheduler
    scheduler_cosine = CosineAnnealingWarmRestarts(optimizer, T_0=8)
    scheduler_warmup = GradualWarmupSchedulerV2(optimizer, multiplier=10, total_epoch=2, after_scheduler=scheduler_cosine)

    # History tracking
    history = {
        'train_loss': [], 'val_loss': [],
        'train_auc': [], 'val_auc': [],
        'triplet_loss': [], 'ce_loss': [],
        'learning_rates': [],
        'best_val_auc': 0, 'best_val_auc_epoch': 0,
        'best_val_loss': float('inf'), 'best_val_loss_epoch': 0
    }

    print(f"Fold {fold + 1}: =========================================")
    
    early_stopping_active = False
    es = EarlyStopper(min_delta=1e-3, patience=2)
    
    for epoch in range(1, n_epochs + 1):
        current_lr = optimizer.param_groups[0]['lr']
        history['learning_rates'].append(current_lr)
        
        print(f"\nEP {epoch}/{n_epochs} (LR: {current_lr:.6f}):")
        train_loss, train_auc, train_triplet_loss, train_ce_loss = train_epoch(model, train_loader, optimizer, criterion, triplet_loss, miner, device)
        val_loss, val_auc, val_triplet_loss, val_ce_loss = val_epoch(model, val_loader, criterion, triplet_loss, miner, device)
        
        print(f"Train AUC: {train_auc:.4f}, CE Loss: {train_ce_loss:.4f}, Triplet Loss: {train_triplet_loss:.4f}")
        print(f"Val AUC: {val_auc:.4f}, CE Loss: {val_ce_loss:.4f}, Triplet Loss: {val_triplet_loss:.4f}")
        
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_auc'].append(train_auc)
        history['val_auc'].append(val_auc)
        history['triplet_loss'].append(train_triplet_loss)
        history['ce_loss'].append(train_ce_loss)
        
        if val_auc > history['best_val_auc']:
            history['best_val_auc'] = val_auc
            history['best_val_auc_epoch'] = epoch
            torch.save(model.state_dict(), f'fold_{fold}_best_auc.pth')
            print(f"New best AUC! Model saved.")
        
        if val_loss < history['best_val_loss']:
            history['best_val_loss'] = val_loss
            history['best_val_loss_epoch'] = epoch
        
        scheduler_cosine.step()
            
        if epoch == scheduler_warmup.total_epoch:
            early_stopping_active = True
            # Reset early stopper to forget the potentially misleading high scores during warmup
            es = EarlyStopper(min_delta=1e-3, patience=2)
            print("Warmup complete. Early stopping now active.")
        
        # Only check early stopping if it's active
        if early_stopping_active:
            if es.early_stop(val_auc):
                print(f"Early stopping triggered at epoch {epoch}")
                break
        
    torch.save(model.state_dict(), f'fold_{fold}_final.pth')
    plot_fold_history(fold, history)
    
    # Compute OOF predictions after training
    model.load_state_dict(torch.load(f'fold_{fold}_best_auc.pth'))
    model.eval()
    oof_preds = []
    oof_targets = []
    oof_embeddings = []
    with torch.no_grad():
        for img, label in val_loader:
            inputs = img.to(device)
            targets = label.to(device)
            logits, embeddings = model(inputs)
            probs = F.softmax(logits, dim=1).cpu().numpy()
            oof_preds.append(probs)
            oof_targets.append(targets.cpu().numpy())
            oof_embeddings.append(embeddings.cpu().numpy())
    
    oof_preds = np.concatenate(oof_preds)
    oof_targets = np.concatenate(oof_targets)
    oof_embeddings = np.concatenate(oof_embeddings)
    
    # Save embeddings for later visualization or analysis
    np.save(f'fold_{fold}_embeddings.npy', oof_embeddings)
    
    # Compute test predictions
  
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, prefetch_factor=2)
    test_preds = []
    test_embeddings = []
    with torch.no_grad():
        for img in test_loader:
            inputs = img.to(device)
            logits, embeddings = model(inputs)
            probs = F.softmax(logits, dim=1).cpu().numpy()
            test_preds.append(probs)
            test_embeddings.append(embeddings.cpu().numpy())
    
    test_preds = np.concatenate(test_preds)
    test_embeddings = np.concatenate(test_embeddings)
    
    # Save test embeddings
    np.save(f'fold_{fold}_test_embeddings.npy', test_embeddings)
    
    oof_names = val_df['id'].values
    oof_folds = np.full(len(oof_targets), fold)
    return oof_preds, oof_targets, oof_names, oof_folds, test_preds, oof_embeddings

# Visualization function for embeddings using t-SNE
def visualize_embeddings(embeddings, labels, title="t-SNE Visualization of Embeddings"):
    from sklearn.manifold import TSNE
    
    # Apply t-SNE
    tsne = TSNE(n_components=2, random_state=42)
    embeddings_2d = tsne.fit_transform(embeddings)
    
    # Plot
    plt.figure(figsize=(10, 8))
    for class_idx in np.unique(labels):
        plt.scatter(
            embeddings_2d[labels == class_idx, 0],
            embeddings_2d[labels == class_idx, 1],
            label=f'Class {class_idx}'
        )
    
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'{title.replace(" ", "_")}.png')
    plt.show()

IMG_SIZE=256
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 32
n_epochs = 15
num_workers = os.cpu_count() #check 
print(f"Num workers = {num_workers}")
folds=[0,1,2,3,4]
lr = 3e-4

oof_preds_all = []
oof_targets_all = []
oof_names_all = []
oof_folds_all = []
oof_embeddings_all = []

transforms_train, transforms_val = get_transforms(IMG_SIZE)

for fold in folds:
    oof_preds, oof_targets, oof_names, oof_folds, test_preds, oof_embeddings = run(
        fold, df, root_dir, transforms_train, transforms_val, num_workers, n_epochs=n_epochs
    )
    oof_preds_all.append(oof_preds)
    oof_targets_all.append(oof_targets)
    oof_names_all.append(oof_names)
    oof_folds_all.append(oof_folds)
    oof_embeddings_all.append(oof_embeddings)
    test_preds_all[:, :, fold] = test_preds
    
    # Visualize embeddings for this fold
    visualize_embeddings(oof_embeddings, oof_targets, f"Fold {fold} Embeddings")

# Concatenate OOF data
oof_preds_all = np.concatenate(oof_preds_all)
oof_targets_all = np.concatenate(oof_targets_all)
oof_names_all = np.concatenate(oof_names_all)
oof_folds_all = np.concatenate(oof_folds_all)
oof_embeddings_all = np.concatenate(oof_embeddings_all)

# Visualize all embeddings together
visualize_embeddings(oof_embeddings_all, oof_targets_all, "All Folds Embeddings")

# Compute overall OOF AUC
auc = roc_auc_score(oof_targets_all, oof_preds_all, multi_class='ovr')
print(f'Overall OOF AUC = {auc:.3f}')

# Save OOF to CSV
df_oof = pd.DataFrame({
    'image_name': oof_names_all,
    'target': oof_targets_all,
    'pred': oof_preds_all.argmax(axis=1),
    'fold': oof_folds_all
})
df_oof.to_csv('oof_triplet.csv', index=False)
print("OOF saved to 'oof_triplet.csv'")
print(df_oof.head())

# Average test predictions across 5 folds
test_preds_final = test_preds_all.mean(axis=2)

# Save test predictions
submission_df = pd.DataFrame({
    'id': test_df['id'],
    'class': np.argmax(test_preds_final, axis=1)
})
submission_df.to_csv('submission_triplet.csv', index=False)
print("Submission saved to 'submission_triplet.csv'")