# Train

TODO convert this to train.py

In [1]:
import os
from pathlib import Path
import math
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from data_loader.cityscapes import CityscapesDataLoader
from model.deeplabv3plus import DeepLabv3Plus
from torch_poly_lr_decay import PolynomialLRDecay
from model.metric import SegmentationMetrics

In [2]:
def save_checkpoint(epoch, model, optimizer, path):
    torch.save(
        {
            "epoch": epoch,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        },
        path,
    )


def train_one_epoch(epoch, model, criterion, optimizer, scheduler, data_loader, device):
    model.train()

    total_loss = 0
    n_batches = len(data_loader)
    for i, sample in enumerate(tqdm(data_loader)):
        images = sample["image"].to(device)
        masks = sample["mask"].to(device)

        optimizer.zero_grad()
        pred = model(images)
        loss = criterion(pred, masks)
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()
        writer.add_scalar("train_iter_loss", loss.item(), i + n_batches * epoch)

    avg_loss = total_loss / n_batches
    writer.add_scalar("train_epoch_avg_loss", avg_loss, epoch)
    return avg_loss


def validate(epoch, model, criterion, data_loader, num_classes, device):
    model.eval()

    total_loss = 0
    n_batches = len(data_loader)
    metrics = SegmentationMetrics(num_classes=num_classes, ignore_idx=ignore_idx)
    with torch.no_grad():
        for i, sample in enumerate(tqdm(data_loader)):
            images = sample["image"].to(device)
            masks = sample["mask"].to(device)

            pred = model(images)
            loss = criterion(pred, masks)
            total_loss += loss.item()

            pred_cls = torch.argmax(pred, dim=1)
            metrics.update(pred_cls, masks)

    model.train()

    avg_loss = total_loss / n_batches
    ious, mIoU = metrics.iou()
    writer.add_scalar("val_epoch_avg_loss", avg_loss, epoch)
    writer.add_scalar("val_epoch_mIoU", mIoU, epoch)
    return avg_loss, ious, mIoU

## Checkpoints and Tensorboard

In [3]:
run_dir = "run3"

log_dir = os.path.join("./experiments", "runs", run_dir)
Path(log_dir).mkdir(parents=True, exist_ok=True)
writer = SummaryWriter(log_dir=log_dir)
print(f"Writing tensorboard logs to {log_dir}")

checkpoint_dir = os.path.join("./experiments", "checkpoints", run_dir)
Path(checkpoint_dir).mkdir(parents=True, exist_ok=True)
checkpoint_file = os.path.join(checkpoint_dir, "best_model.pt")
print(f"Saving model checkpoints to {checkpoint_file}")

Writing tensorboard logs to ./experiments/runs/run3
Saving model checkpoints to ./experiments/checkpoints/run3/best_model.pt


## Get DataLoader

In [4]:
img_root = "./data/leftImg8bit"
mask_root = "./data/gtFine"
batch_size = 4

cityscapes = CityscapesDataLoader(img_root, mask_root, batch_size)

## Initialize Model, Loss, and Optimizer

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = cityscapes.train_set.num_classes
ignore_idx = cityscapes.train_set.ignoreId
initial_lr = 0.007
end_lr = 0.001
momentum = 0.9
weight_decay = 0.0005
train_iterations = 90000
total_epochs = math.ceil(train_iterations / len(cityscapes.train_loader))

model = DeepLabv3Plus(num_classes).to(device)
criterion = nn.CrossEntropyLoss(ignore_index=ignore_idx)
optimizer = torch.optim.SGD(
    model.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay
)
scheduler = PolynomialLRDecay(
    optimizer, max_decay_steps=train_iterations, end_learning_rate=end_lr, power=0.9
)

In [6]:
# patience = 15
best_mIoU = 0
best_epoch = 0
for epoch in range(total_epochs):
    train_loss = train_one_epoch(
        epoch, model, criterion, optimizer, scheduler, cityscapes.train_loader, device
    )
    val_loss, ious, mIoU = validate(
        epoch, model, criterion, cityscapes.val_loader, num_classes, device
    )
    print(f"Epoch {epoch}: val_loss {val_loss:.4f} | mIoU {mIoU: .4f}")

    if mIoU > best_mIoU:
        best_mIoU = mIoU
        best_epoch = epoch
        save_checkpoint(epoch, model, optimizer, checkpoint_file)
    # elif epoch - best_epoch >= patience:
    #     print(f"Early Stopping at epoch {epoch}")
    #     break

100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 0: val_loss 0.8569 | mIoU  0.1491


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 1: val_loss 0.7237 | mIoU  0.1772


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]


Epoch 2: val_loss 0.4957 | mIoU  0.2335


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 3: val_loss 0.5224 | mIoU  0.2282


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Epoch 4: val_loss 0.4516 | mIoU  0.2552


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 5: val_loss 0.4648 | mIoU  0.2384


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]


Epoch 6: val_loss 0.4436 | mIoU  0.2647


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 7: val_loss 0.4902 | mIoU  0.2427


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 8: val_loss 0.4372 | mIoU  0.2632


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Epoch 9: val_loss 0.3851 | mIoU  0.2859


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 10: val_loss 0.3577 | mIoU  0.3028


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Epoch 11: val_loss 0.3708 | mIoU  0.3060


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Epoch 12: val_loss 0.3752 | mIoU  0.3088


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 13: val_loss 0.3262 | mIoU  0.3345


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 14: val_loss 0.3687 | mIoU  0.3103


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 15: val_loss 0.5430 | mIoU  0.2587


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 16: val_loss 0.3204 | mIoU  0.3262


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 17: val_loss 0.3051 | mIoU  0.3477


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 18: val_loss 0.3179 | mIoU  0.3445


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 19: val_loss 0.3470 | mIoU  0.3291


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 20: val_loss 0.3036 | mIoU  0.3456


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]


Epoch 21: val_loss 0.3135 | mIoU  0.3554


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 22: val_loss 0.3059 | mIoU  0.3511


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 23: val_loss 0.3059 | mIoU  0.3454


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 24: val_loss 0.2999 | mIoU  0.3625


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 25: val_loss 0.4826 | mIoU  0.3067


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 26: val_loss 0.2900 | mIoU  0.3671


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 27: val_loss 0.3575 | mIoU  0.3418


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]


Epoch 28: val_loss 0.2805 | mIoU  0.3806


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 29: val_loss 0.2810 | mIoU  0.3736


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 30: val_loss 0.2939 | mIoU  0.3706


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]


Epoch 31: val_loss 0.2786 | mIoU  0.3831


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 32: val_loss 0.2654 | mIoU  0.3969


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 33: val_loss 0.2759 | mIoU  0.3830


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 34: val_loss 0.2759 | mIoU  0.3990


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 35: val_loss 0.2629 | mIoU  0.3971


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 36: val_loss 0.4762 | mIoU  0.3245


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 37: val_loss 0.2681 | mIoU  0.3890


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 38: val_loss 0.2719 | mIoU  0.3850


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 39: val_loss 0.2938 | mIoU  0.3789


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 40: val_loss 0.3276 | mIoU  0.3580


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 41: val_loss 0.2451 | mIoU  0.4133


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 42: val_loss 0.7019 | mIoU  0.3030


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Epoch 43: val_loss 0.2576 | mIoU  0.4149


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Epoch 44: val_loss 0.2559 | mIoU  0.4163


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 45: val_loss 0.2920 | mIoU  0.3926


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 46: val_loss 0.2478 | mIoU  0.4170


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 47: val_loss 0.2798 | mIoU  0.3997


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 48: val_loss 0.2527 | mIoU  0.4186


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 49: val_loss 0.3020 | mIoU  0.4100


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]


Epoch 50: val_loss 0.2514 | mIoU  0.4196


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 51: val_loss 1.2316 | mIoU  0.3240


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 52: val_loss 0.2410 | mIoU  0.4387


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 53: val_loss 0.2556 | mIoU  0.4298


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 54: val_loss 0.3690 | mIoU  0.3694


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 55: val_loss 0.2577 | mIoU  0.4174


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 56: val_loss 0.2412 | mIoU  0.4386


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 57: val_loss 0.2429 | mIoU  0.4268


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 58: val_loss 0.2600 | mIoU  0.4250


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 59: val_loss 0.2548 | mIoU  0.4232


100%|██████████| 744/744 [08:20<00:00,  1.49it/s]
100%|██████████| 125/125 [01:42<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 60: val_loss 0.2575 | mIoU  0.4189


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]


Epoch 61: val_loss 0.2307 | mIoU  0.4439


100%|██████████| 744/744 [08:21<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 62: val_loss 0.2453 | mIoU  0.4297


100%|██████████| 744/744 [08:20<00:00,  1.49it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 63: val_loss 0.2435 | mIoU  0.4360


100%|██████████| 744/744 [08:20<00:00,  1.49it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 64: val_loss 0.2216 | mIoU  0.4545


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 65: val_loss 0.2729 | mIoU  0.4328


100%|██████████| 744/744 [08:25<00:00,  1.47it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 66: val_loss 0.2504 | mIoU  0.4249


100%|██████████| 744/744 [08:28<00:00,  1.46it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 67: val_loss 0.2663 | mIoU  0.4354


100%|██████████| 744/744 [08:26<00:00,  1.47it/s]
100%|██████████| 125/125 [01:45<00:00,  1.19it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 68: val_loss 0.2273 | mIoU  0.4525


100%|██████████| 744/744 [08:27<00:00,  1.46it/s]
100%|██████████| 125/125 [01:45<00:00,  1.18it/s]


Epoch 69: val_loss 0.2343 | mIoU  0.4625


100%|██████████| 744/744 [08:27<00:00,  1.47it/s]
100%|██████████| 125/125 [01:45<00:00,  1.19it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 70: val_loss 0.2169 | mIoU  0.4610


100%|██████████| 744/744 [08:32<00:00,  1.45it/s]
100%|██████████| 125/125 [01:44<00:00,  1.19it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 71: val_loss 0.2346 | mIoU  0.4437


100%|██████████| 744/744 [08:30<00:00,  1.46it/s]
100%|██████████| 125/125 [01:45<00:00,  1.18it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 72: val_loss 0.2455 | mIoU  0.4439


100%|██████████| 744/744 [08:27<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Epoch 73: val_loss 0.2239 | mIoU  0.4637


100%|██████████| 744/744 [08:25<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 74: val_loss 0.2178 | mIoU  0.4593


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 75: val_loss 0.2238 | mIoU  0.4709


100%|██████████| 744/744 [08:24<00:00,  1.47it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]


Epoch 76: val_loss 0.2245 | mIoU  0.4824


100%|██████████| 744/744 [08:24<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 77: val_loss 0.2385 | mIoU  0.4397


100%|██████████| 744/744 [08:25<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 78: val_loss 0.2386 | mIoU  0.4497


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 79: val_loss 0.2272 | mIoU  0.4508


100%|██████████| 744/744 [08:25<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 80: val_loss 0.2335 | mIoU  0.4502


100%|██████████| 744/744 [08:25<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.19it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 81: val_loss 0.2198 | mIoU  0.4788


100%|██████████| 744/744 [08:25<00:00,  1.47it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 82: val_loss 0.2211 | mIoU  0.4638


100%|██████████| 744/744 [08:24<00:00,  1.47it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 83: val_loss 0.2149 | mIoU  0.4780


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Epoch 84: val_loss 0.2030 | mIoU  0.4911


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Epoch 85: val_loss 0.2058 | mIoU  0.4960


100%|██████████| 744/744 [08:26<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 86: val_loss 0.2311 | mIoU  0.4695


100%|██████████| 744/744 [08:26<00:00,  1.47it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 87: val_loss 0.2063 | mIoU  0.4866


100%|██████████| 744/744 [08:25<00:00,  1.47it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 88: val_loss 0.2924 | mIoU  0.4210


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 89: val_loss 0.2137 | mIoU  0.4768


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.19it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 90: val_loss 0.2081 | mIoU  0.4869


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:45<00:00,  1.19it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 91: val_loss 0.2143 | mIoU  0.4799


100%|██████████| 744/744 [08:27<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Epoch 92: val_loss 0.1946 | mIoU  0.5054


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.19it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 93: val_loss 1.7240 | mIoU  0.3238


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 94: val_loss 0.2586 | mIoU  0.4310


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 95: val_loss 0.2475 | mIoU  0.4462


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 96: val_loss 0.2148 | mIoU  0.4864


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 97: val_loss 3.1287 | mIoU  0.2834


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 98: val_loss 0.1885 | mIoU  0.5093


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:45<00:00,  1.19it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 99: val_loss 0.2072 | mIoU  0.4937


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 100: val_loss 0.2095 | mIoU  0.5004


100%|██████████| 744/744 [08:25<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 101: val_loss 0.1939 | mIoU  0.4967


100%|██████████| 744/744 [08:26<00:00,  1.47it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]


Epoch 102: val_loss 0.1897 | mIoU  0.5128


100%|██████████| 744/744 [08:26<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.19it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 103: val_loss 0.2068 | mIoU  0.4830


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 104: val_loss 0.1849 | mIoU  0.5199


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 105: val_loss 0.1946 | mIoU  0.5011


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 106: val_loss 0.2009 | mIoU  0.4981


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 107: val_loss 0.1918 | mIoU  0.5066


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 108: val_loss 0.2023 | mIoU  0.4847


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 109: val_loss 0.1883 | mIoU  0.5134


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 110: val_loss 0.1919 | mIoU  0.5127


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 111: val_loss 0.1838 | mIoU  0.5267


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 112: val_loss 0.2302 | mIoU  0.4683


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 113: val_loss 0.1844 | mIoU  0.5232


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Epoch 114: val_loss 0.1849 | mIoU  0.5285


100%|██████████| 744/744 [08:27<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.19it/s]


Epoch 115: val_loss 0.1766 | mIoU  0.5312


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 116: val_loss 0.1795 | mIoU  0.5217


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 117: val_loss 0.1740 | mIoU  0.5298


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 118: val_loss 0.1776 | mIoU  0.5254


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Epoch 119: val_loss 0.1738 | mIoU  0.5382


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]

Epoch 120: val_loss 0.1708 | mIoU  0.5345





In [7]:
writer.close()

In [12]:
writer = SummaryWriter(log_dir=log_dir)

In [13]:
print(total_epochs)
print(epoch)
prev_epochs = 120
for epoch in range(prev_epochs+1, prev_epochs+81):
    train_loss = train_one_epoch(
        epoch, model, criterion, optimizer, scheduler, cityscapes.train_loader, device
    )
    val_loss, ious, mIoU = validate(
        epoch, model, criterion, cityscapes.val_loader, num_classes, device
    )
    print(f"Epoch {epoch}: val_loss {val_loss:.4f} | mIoU {mIoU: .4f}")

    if mIoU > best_mIoU:
        best_mIoU = mIoU
        best_epoch = epoch
        save_checkpoint(epoch, model, optimizer, checkpoint_file)

  0%|          | 0/744 [00:00<?, ?it/s]

121
120


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 121: val_loss 0.1759 | mIoU  0.5225


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 122: val_loss 0.1726 | mIoU  0.5411


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 123: val_loss 0.2774 | mIoU  0.4625


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:45<00:00,  1.18it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 124: val_loss 0.1737 | mIoU  0.5364


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 125: val_loss 0.1763 | mIoU  0.5259


100%|██████████| 744/744 [08:26<00:00,  1.47it/s]
100%|██████████| 125/125 [01:46<00:00,  1.18it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 126: val_loss 0.1780 | mIoU  0.5292


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 127: val_loss 0.1735 | mIoU  0.5395


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 128: val_loss 0.2162 | mIoU  0.5077


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]


Epoch 129: val_loss 0.1788 | mIoU  0.5489


100%|██████████| 744/744 [08:25<00:00,  1.47it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 130: val_loss 0.1744 | mIoU  0.5396


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 131: val_loss 0.1751 | mIoU  0.5434


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 132: val_loss 0.1734 | mIoU  0.5306


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 133: val_loss 0.1720 | mIoU  0.5372


100%|██████████| 744/744 [08:24<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 134: val_loss 0.1869 | mIoU  0.5188


100%|██████████| 744/744 [08:24<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 135: val_loss 0.2197 | mIoU  0.5188


100%|██████████| 744/744 [08:25<00:00,  1.47it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 136: val_loss 0.1713 | mIoU  0.5277


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 137: val_loss 0.1727 | mIoU  0.5414


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 138: val_loss 0.1764 | mIoU  0.5276


100%|██████████| 744/744 [08:22<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]


Epoch 139: val_loss 0.1675 | mIoU  0.5576


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:42<00:00,  1.22it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 140: val_loss 0.1718 | mIoU  0.5278


100%|██████████| 744/744 [08:24<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 141: val_loss 0.1760 | mIoU  0.5449


100%|██████████| 744/744 [08:24<00:00,  1.47it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 142: val_loss 0.1774 | mIoU  0.5361


100%|██████████| 744/744 [08:24<00:00,  1.47it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Epoch 143: val_loss 0.1696 | mIoU  0.5585


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 144: val_loss 0.1752 | mIoU  0.5531


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 145: val_loss 0.1778 | mIoU  0.5323


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 146: val_loss 0.1736 | mIoU  0.5390


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 147: val_loss 0.1688 | mIoU  0.5485


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 148: val_loss 0.1750 | mIoU  0.5389


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:45<00:00,  1.19it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 149: val_loss 0.1711 | mIoU  0.5475


100%|██████████| 744/744 [08:25<00:00,  1.47it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 150: val_loss 0.1661 | mIoU  0.5501


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 151: val_loss 0.1657 | mIoU  0.5558


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 152: val_loss 0.1752 | mIoU  0.5325


100%|██████████| 744/744 [08:24<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 153: val_loss 0.1665 | mIoU  0.5478


100%|██████████| 744/744 [08:24<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.20it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 154: val_loss 0.1776 | mIoU  0.5253


100%|██████████| 744/744 [08:24<00:00,  1.47it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 155: val_loss 0.1833 | mIoU  0.5236


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 156: val_loss 0.1759 | mIoU  0.5249


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 157: val_loss 0.1706 | mIoU  0.5350


100%|██████████| 744/744 [08:23<00:00,  1.48it/s]
100%|██████████| 125/125 [01:43<00:00,  1.21it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 158: val_loss 0.1678 | mIoU  0.5462


100%|██████████| 744/744 [08:24<00:00,  1.47it/s]
100%|██████████| 125/125 [01:45<00:00,  1.19it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 159: val_loss 0.1708 | mIoU  0.5321


100%|██████████| 744/744 [08:25<00:00,  1.47it/s]
100%|██████████| 125/125 [01:44<00:00,  1.19it/s]
  0%|          | 0/744 [00:00<?, ?it/s]

Epoch 160: val_loss 0.1666 | mIoU  0.5481


  0%|          | 2/744 [00:01<08:46,  1.41it/s]


KeyboardInterrupt: 