In [33]:
import logging
from IPython.core.interactiveshell import InteractiveShell
%load_ext autoreload
InteractiveShell.ast_node_interactivity = "all"

logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [34]:
import pandas as pd
import numpy as np
import os
import sys
import matplotlib.pyplot as plt

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

# Load data

# Training

In [35]:
import torch
from torch.utils.data import Dataset, DataLoader

device = "cuda" if torch.cuda.is_available() else "cpu"

In [36]:
from peak_detection_2d.model import PeakDetectionNet
from torchinfo import summary

net = PeakDetectionNet(1, 32)

summary(
    model=net,
    # (batch_size, color_channels, height, width)
    input_size=[(8, 1, 180, 180), (8, 2)],
    col_names=["input_size", "output_size", "num_params"],
    col_width=20,
    row_settings=["var_names"],
)

Layer (type (var_name))                  Input Shape          Output Shape         Param #
PeakDetectionNet (PeakDetectionNet)      [8, 1, 180, 180]     [8, 4]               --
├─Sequential (conv)                      [8, 1, 180, 180]     [8, 4]               --
│    └─ResBlock (0)                      [8, 1, 180, 180]     [8, 32, 180, 180]    --
│    │    └─Sequential (base1)           [8, 1, 180, 180]     [8, 1, 180, 180]     12
│    │    └─Sequential (base2)           [8, 1, 180, 180]     [8, 32, 180, 180]    384
│    └─MaxPool2d (1)                     [8, 32, 180, 180]    [8, 32, 90, 90]      --
│    └─ResBlock (2)                      [8, 32, 90, 90]      [8, 64, 90, 90]      --
│    │    └─Sequential (base1)           [8, 32, 90, 90]      [8, 32, 90, 90]      9,312
│    │    └─Sequential (base2)           [8, 32, 90, 90]      [8, 64, 90, 90]      18,624
│    └─MaxPool2d (3)                     [8, 64, 90, 90]      [8, 64, 45, 45]      --
│    └─ResBlock (4)                      

In [46]:
%autoreload 2
from peak_detection_2d.loss import WeightedBoundingBoxIoULoss
import logging
import json
import os

import torch
from torch import nn
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchvision.transforms import Compose

from peak_detection_2d.utils import (
    plot_sample_predictions,
    plot_history,
)
from peak_detection_2d.dataset import MultiHDF5Dataset, ToTensor, Padding
from peak_detection_2d.model import (
    PeakDetectionNet,
    train_val_step,
    train_val_step_wiou,
)

logging.basicConfig(
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    level=logging.INFO,
)
# experiment specific
result_parent_dir = "/cmnfs/proj/ORIGINS/data/brain/FreshFrozenBrain/SingleShot/DDA/"
result_base_dir = "frame0_1830_ssDDA_P064428_Fresh1_5ug_R1_BD5_1_4921_ScanByScan_RTtol0.9_threshold_missabthres0.5_convergence_NoIntercept_pred_mzBinDigits2_imPeakWidth4_deltaMobilityThres80"
result_dir = os.path.join(result_parent_dir, result_base_dir)
peak_selection_dir = os.path.join(result_dir, "peak_selection_model_1out64_lr01")
if not os.path.exists(peak_selection_dir):
    os.makedirs(peak_selection_dir)

num_epoch = 5
patience = 10
inital_lr = 0.001
batch_size = 64

random_state = 42
device = "cuda" if torch.cuda.is_available() else "cpu"
logging.info("Using device: %s", device)
model = PeakDetectionNet(1, 32).to(device)

loss_func_val = WeightedBoundingBoxIoULoss(reduction="mean", add_diou=False, add_smooth_l1=False)
loss_func_train = WeightedBoundingBoxIoULoss(reduction="mean", add_diou=False, add_smooth_l1=True)
loss_func_l1 = nn.SmoothL1Loss(reduction="mean")
optimizer = torch.optim.Adam(model.parameters(), lr=inital_lr)
scheduler = ReduceLROnPlateau(
    optimizer, mode="min", factor=0.1, patience=3, min_lr=0.000001
)

loss_tracking = {"train": [], "val": []}
iou_tracking = {"train": [], "val": []}
best_loss = float("inf")


with open(os.path.join(result_dir, "param.json"), mode="r", encoding="utf-8") as file:
    config = json.load(file)

hdf5_files = [
    os.path.join(os.path.join(result_dir, "peak_detection_data"), file)
    for file in os.listdir(os.path.join(result_dir, "peak_detection_data"))
    if file.endswith(".h5")
]

# Define transformations (if any)
transformation = Compose([Padding((180, 180)), ToTensor(scale_label=False)])

# Create the dataset
dataset = MultiHDF5Dataset(hdf5_files, transforms=transformation)

# Split the dataset into training and testing sets
train_val_dataset, test_dataset = dataset.split_dataset(
    train_ratio=0.2, seed=random_state
)
train_dataset, val_dataset = train_val_dataset.split_dataset(
    train_ratio=0.8, seed=random_state
)
logging.info("Train dataset size: %d", len(train_dataset))
logging.info("Validation dataset size: %d", len(val_dataset))
logging.info("Test dataset size: %d", len(test_dataset))
# Example usage
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=False
)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=1024, shuffle=False
)
test_dataloader = torch.utils.data.DataLoader(
    test_dataset, batch_size=1024, shuffle=False
)


for epoch in range(num_epoch):
    logging.info("Epoch %d/%d", epoch + 1, num_epoch)

    training_loss, trainig_iou = train_val_step_wiou(
        train_dataloader, model, loss_func_train, optimizer
    )
    loss_tracking["train"].append(training_loss)
    iou_tracking["train"].append(trainig_iou)

    with torch.inference_mode():
        val_loss, val_iou = train_val_step_wiou(val_dataloader, model, loss_func_val, None)
        loss_tracking["val"].append(val_loss)
        iou_tracking["val"].append(val_iou)
        if val_loss < best_loss:
            logging.info("Saving best model")
            torch.save(
                model.state_dict(), os.path.join(peak_selection_dir, "best_model.pt")
            )
            best_loss = val_loss
            current_patience = patience
        else:
            current_patience -= 1
            if current_patience == 0:
                logging.info("Early stopping")
                break
        scheduler.step(val_loss)
        logging.info(
            "Last learning rate: %s",
            scheduler.get_last_lr(),
        )

    logging.info("Training loss: %.6f, IoU: %.2f", training_loss, trainig_iou)
    logging.info("Validation loss: %.6f, IoU: %.6f", val_loss, val_iou)

# Plot history
with open(os.path.join(peak_selection_dir, "loss.json"), "w", encoding="utf-8") as fp:
    json.dump(loss_tracking, fp)
with open(os.path.join(peak_selection_dir, "iou.json"), "w", encoding="utf-8") as fp:
    json.dump(iou_tracking, fp)

plot_history(loss_tracking, "loss", save_dir=peak_selection_dir)
plot_history(iou_tracking, "iou", save_dir=peak_selection_dir)

# Plot sample predictions
plot_sample_predictions(
    test_dataset,
    model=model,
    n=5,
    save_dir=os.path.join(peak_selection_dir, "sample_predictions"),
)

2024-05-28 15:52:02,348 - root - INFO - Using device: cuda


2024-05-28 15:52:02,396 - root - INFO - Train dataset size: 2300
2024-05-28 15:52:02,397 - root - INFO - Validation dataset size: 575
2024-05-28 15:52:02,398 - root - INFO - Test dataset size: 11504
2024-05-28 15:52:02,400 - root - INFO - Epoch 1/5
2024-05-28 15:52:02,401 - peak_detection_2d.model - INFO - model is in training mode
2024-05-28 15:52:02,693 - peak_detection_2d.loss - INFO - Union box: x1: tensor(-8.7384, device='cuda:0', grad_fn=<SelectBackward0>), y1: tensor(5.2603, device='cuda:0', grad_fn=<SelectBackward0>), x2: tensor(69., device='cuda:0', grad_fn=<SelectBackward0>), y2: tensor(95., device='cuda:0', grad_fn=<SelectBackward0>)
2024-05-28 15:52:02,699 - peak_detection_2d.loss - INFO - Union box: x1: tensor(-3.8303, device='cuda:0', grad_fn=<SelectBackward0>), y1: tensor(8.1944, device='cuda:0', grad_fn=<SelectBackward0>), x2: tensor(76., device='cuda:0', grad_fn=<SelectBackward0>), y2: tensor(66., device='cuda:0', grad_fn=<SelectBackward0>)
2024-05-28 15:52:02,703 - pe

In [43]:
plot_sample_predictions(
    test_dataset,
    model=model,
    n=10,
    save_dir=os.path.join(peak_selection_dir, "sample_predictions"),
)

2024-05-28 15:39:52,362 - peak_detection_2d.utils - INFO - Output: tensor([[-59.8907, -13.3338,  18.1896,  61.5581]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
2024-05-28 15:39:52,367 - peak_detection_2d.loss - INFO - Union box: x1: tensor(-59.8907, device='cuda:0', grad_fn=<SelectBackward0>), y1: tensor(-13.3338, device='cuda:0', grad_fn=<SelectBackward0>), x2: tensor(295., device='cuda:0', grad_fn=<SelectBackward0>), y2: tensor(145., device='cuda:0', grad_fn=<SelectBackward0>)
2024-05-28 15:39:52,371 - peak_detection_2d.loss - INFO - weighted iou mean: tensor(0., device='cuda:0')
2024-05-28 15:39:52,372 - peak_detection_2d.loss - INFO - wiou loss: 1.0
2024-05-28 15:39:53,172 - peak_detection_2d.utils - INFO - Output: tensor([[-165.8776, -110.0317,   49.5585,  210.1995]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
2024-05-28 15:39:53,177 - peak_detection_2d.loss - INFO - weighted iou mean: tensor(0.1731, device='cuda:0')
2024-05-28 15:39:53,179 - peak_detection_2d.loss 

In [32]:
import gc

torch.cuda.empty_cache()
gc.collect()

31

In [None]:
from peak_detection_2d.utils import plot_data_points

# Sample n datapoints from test_dataset
n = 5
sample_indices = np.random.choice(len(test_dataset), n, replace=False)
for i in sample_indices:
    image, hint, label = test_dataset[i]
    output = model(image.unsqueeze(0).float(), hint.unsqueeze(0).float())
    iou = iou_batch(output, label.unsqueeze(0))
    to_plot = {"data": image[0].cpu(), "hint_idx": hint.cpu(), "bbox": label.cpu()}
    plot_data_points(to_plot, pred_bbox=output[0].cpu().detach().numpy(), zoom_in=True)
    plt.title(f"IoU: {iou:.2f}")
    plt.savefig(f"sample_{i}.png", dpi=300)
    plt.close()