In [1]:
import os 
import sys

current_file_dir = os.getcwd()

root_dir = os.path.dirname(current_file_dir)

sys.path.append(root_dir)
dataset_dir = os.path.join(root_dir, 'data')


processed_dir = os.path.join(dataset_dir, 'processed')
if not os.path.exists(processed_dir):
    os.makedirs(processed_dir)


from src.utils.preprocess import create_pointcloud_image, create_morphological_polygon
from src.model import UNetModel, UNetConfig, create_unet_model
from src.utils.dataset import MultiViewImageDataset, JustCAM
from src.utils.loss import DiceLoss, FocalLoss, CombinedLoss, IoULoss
from src.utils.metrics import SegmentationMetrics
from torchvision import transforms
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

In [3]:
dataset_dir = os.path.join(dataset_dir, 'processed', 'lidarseg_images')
input_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

mask_transform = transforms.Compose([
    #convert RGB to grayscale
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
])

batch_size = 24
num_workers = 16
image_size = (398,224)


In [4]:
import pandas as pd
import copy
normal_dataset = MultiViewImageDataset(
    root_dir=os.path.join(dataset_dir, 'trainval'),
    input_transform=input_transform,
    mask_transform=mask_transform,
    image_size=image_size)

train_valid_df = pd.read_csv(os.path.join(dataset_dir,'trainval', 'trainval.csv'))
trainables = train_valid_df[train_valid_df['trainable'] == 1]['filename'].tolist()
validables = train_valid_df[train_valid_df['trainable'] == 0]['filename'].tolist()

train_normal_dataset = copy.deepcopy(normal_dataset)
valid_normal_dataset = copy.deepcopy(normal_dataset)

train_normal_dataset.update_image_names(trainables)
valid_normal_dataset.update_image_names(validables)

print(f"Normal dataset size: {len(train_normal_dataset)}")
print(f"Validation dataset size: {len(valid_normal_dataset)}")






just_cam_dataset = JustCAM(
    root_dir=os.path.join(dataset_dir, 'trainval'),
    input_transform=input_transform,
    mask_transform=mask_transform,
    image_size=image_size)


valid_just_cam_dataset = copy.deepcopy(just_cam_dataset)
train_just_cam_dataset = copy.deepcopy(just_cam_dataset)
train_just_cam_dataset.update_image_names(trainables)
valid_just_cam_dataset.update_image_names(validables)




occluded_dataset = MultiViewImageDataset(
    root_dir=os.path.join(dataset_dir, 'occluded'),
    input_transform=input_transform,
    mask_transform=mask_transform,
    image_size=image_size)
train_valid_occluded_df = pd.read_csv(os.path.join(dataset_dir,'occluded', 'trainval.csv'))
trainables_occluded = train_valid_occluded_df[train_valid_occluded_df['trainable'] == 1]['filename'].tolist()
validables_occluded = train_valid_occluded_df[train_valid_occluded_df['trainable'] == 0]['filename'].tolist()
train_occluded_dataset = copy.deepcopy(occluded_dataset)
valid_occluded_dataset = copy.deepcopy(occluded_dataset)
train_occluded_dataset.update_image_names(trainables_occluded)
valid_occluded_dataset.update_image_names(validables_occluded)
print(f"Occluded dataset size: {len(train_occluded_dataset)}")
print(f"Validation dataset size: {len(valid_occluded_dataset)}")




## merge datasets
train_dataset = train_normal_dataset + train_occluded_dataset + train_just_cam_dataset
valid_dataset = valid_normal_dataset + valid_occluded_dataset + valid_just_cam_dataset


print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(valid_dataset)}")

Normal dataset size: 27319
Validation dataset size: 6830
Occluded dataset size: 13112
Validation dataset size: 3279
Train dataset size: 67750
Validation dataset size: 16939


In [5]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size, 
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True
)
val_loader = torch.utils.data.DataLoader(
    valid_dataset,
    batch_size=batch_size, 
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)


In [6]:
model = create_unet_model(
    in_channels=6,
    out_channels=1,
    input_height=224,
    input_width=398
)

In [7]:
#device 
device = "cuda"

model.to(device)



UNetModel(
  (ups): ModuleList(
    (0): ConvTranspose2d(1024, 512, kernel_size=(2, 2), stride=(2, 2))
    (1): DoubleConv(
      (conv): Sequential(
        (0): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): ReLU(inplace=True)
      )
    )
    (2): ConvTranspose2d(512, 256, kernel_size=(2, 2), stride=(2, 2))
    (3): DoubleConv(
      (conv): Sequential(
        (0): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 

In [8]:
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)

criterion = CombinedLoss()

In [9]:
# --- Eğitim Döngüsü ---
EPOCH = 10
# SegmentationMetrics sınıfından bir nesne oluştur
metrics_calculator = SegmentationMetrics(threshold=0.5) # Eşik değerini ayarlayabilirsiniz

best_val_metric = 0.0 # Veya en düşük kayıp için float('inf')
best_model_dir = "output"

for epoch in range(EPOCH):
    # --- Eğitim Aşaması ---
    model.train()
    train_loss = 0.0
    train_progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCH} - Training", leave=False)
    for batch in train_progress_bar:
        cam = batch["cam"].to(device)
        pointcloud = batch["point_cloud"].to(device)

        ## concatenate cam and pointcloud should be 6 channels
        inputs = torch.cat((cam, pointcloud), dim=1) 
        masks = batch["masked"].to(device)
       
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_progress_bar.set_postfix(loss=loss.item())

    train_loss /= len(train_loader)

    # --- Doğrulama (Validation) Aşaması ---
    model.eval()
    val_loss = 0.0
    metrics_calculator.reset() # Her epoch başında metrikleri sıfırla

    val_progress_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCH} - Validation", leave=False)
    with torch.no_grad():
        for batch in val_progress_bar:
            
            cam = batch["cam"].to(device)
            pointcloud = batch["point_cloud"].to(device)
            ## concatenate cam and pointcloud should be 6 channels
            inputs = torch.cat((cam, pointcloud), dim=1)
            masks = batch["masked"].to(device)

            
            outputs = model(inputs)
            loss = criterion(outputs, masks) # Validation loss'u hesapla
            val_loss += loss.item()

            # Metrikleri güncelle
            metrics_calculator.update(outputs, masks) # Model çıktıları (logitler) ve hedefler
            val_progress_bar.set_postfix(loss=loss.item())
        


    val_loss /= len(val_loader)
    epoch_metrics = metrics_calculator.compute() # Epoch için ortalama metrikleri al

    print(f"Epoch {epoch+1}/{EPOCH} -> Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    print(f"Val Metrics -> IoU: {epoch_metrics['IoU']:.4f}, Dice: {epoch_metrics['Dice']:.4f}, "
          f"Precision: {epoch_metrics['Precision']:.4f}, Recall: {epoch_metrics['Recall']:.4f}, "
          f"F1: {epoch_metrics['F1']:.4f}, PixelAcc: {epoch_metrics['PixelAcc']:.4f}")

    # Scheduler'ı güncelle (eğer validation loss'a bağlıysa)
    # Eğer ReduceLROnPlateau gibi bir scheduler kullanıyorsanız:
    scheduler.step(val_loss)
    # Eğer CosineAnnealingLR gibi epoch bazlı bir scheduler kullanıyorsanız:
    # scheduler.step()

    # En iyi modeli kaydet
    if epoch_metrics['IoU'] > best_val_metric:
        best_val_metric = epoch_metrics['IoU']
        model.save_pretrained(os.path.join(best_model_dir, f"lidarseg_unet_epoch_{epoch+1}"))

                                                                                       

Epoch 1/10 -> Train Loss: 0.1394, Val Loss: 0.0817
Val Metrics -> IoU: 0.7678, Dice: 0.8645, Precision: 0.8526, Recall: 0.8799, F1: 0.8645, PixelAcc: 0.8486


                                                                                       

Epoch 2/10 -> Train Loss: 0.0772, Val Loss: 0.0776
Val Metrics -> IoU: 0.7681, Dice: 0.8642, Precision: 0.8661, Recall: 0.8653, F1: 0.8642, PixelAcc: 0.8494


                                                                                       

Epoch 3/10 -> Train Loss: 0.0702, Val Loss: 0.0696
Val Metrics -> IoU: 0.7830, Dice: 0.8756, Precision: 0.8714, Recall: 0.8819, F1: 0.8756, PixelAcc: 0.8509


                                                                                       

Epoch 4/10 -> Train Loss: 0.0669, Val Loss: 0.0686
Val Metrics -> IoU: 0.7865, Dice: 0.8779, Precision: 0.8684, Recall: 0.8892, F1: 0.8779, PixelAcc: 0.8509


                                                                                       

Epoch 5/10 -> Train Loss: 0.0647, Val Loss: 0.0657
Val Metrics -> IoU: 0.7922, Dice: 0.8821, Precision: 0.8740, Recall: 0.8916, F1: 0.8821, PixelAcc: 0.8515


                                                                                       

Epoch 6/10 -> Train Loss: 0.0628, Val Loss: 0.0631
Val Metrics -> IoU: 0.7960, Dice: 0.8846, Precision: 0.8839, Recall: 0.8868, F1: 0.8846, PixelAcc: 0.8520


                                                                                       

Epoch 7/10 -> Train Loss: 0.0613, Val Loss: 0.0617
Val Metrics -> IoU: 0.8009, Dice: 0.8880, Precision: 0.8810, Recall: 0.8960, F1: 0.8880, PixelAcc: 0.8521


                                                                                       

Epoch 8/10 -> Train Loss: 0.0602, Val Loss: 0.0610
Val Metrics -> IoU: 0.8022, Dice: 0.8889, Precision: 0.8819, Recall: 0.8968, F1: 0.8889, PixelAcc: 0.8523


                                                                                       

Epoch 9/10 -> Train Loss: 0.0589, Val Loss: 0.0618
Val Metrics -> IoU: 0.8005, Dice: 0.8878, Precision: 0.8786, Recall: 0.8980, F1: 0.8878, PixelAcc: 0.8523


                                                                                        

Epoch 10/10 -> Train Loss: 0.0583, Val Loss: 0.0596
Val Metrics -> IoU: 0.8048, Dice: 0.8906, Precision: 0.8865, Recall: 0.8955, F1: 0.8906, PixelAcc: 0.8525


In [11]:
model_save_dir = os.path.join(root_dir, 'models')
if not os.path.exists(model_save_dir):
    os.makedirs(model_save_dir)

model_save_path = os.path.join(model_save_dir, 'lidarseg_unet-aug')

model.save_pretrained(model_save_path)