# Packages

In [1]:
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F

from modules import conv3x3, conv1x1, DWConvBNAct, PWConvBNAct, ConvBNAct, Activation, SegHead
from model_registry import register_model, aux_models

# Python 모듈 경로에 'models' 폴더 추가
models_path = "/content/drive/MyDrive/Colab Notebooks/AIFFELthon/bisenetv2_zh320"
# utils_path = "/aiffel/aiffel/aiffel project model training/Fast-SCNN-pytorch-master/utils"

if models_path not in sys.path:
    sys.path.append(models_path)  # models 폴더 추가

# sys.path 확인 (제대로 추가되었는지)
print("Python 경로 목록:")
print("\n".join(sys.path))

train_img_path = "/home/segmentsafestep/Fast-SCNN-pytorch-master/datasets/citys/leftImg8bit/train"
dataset_root = "/home/segmentsafestep/Fast-SCNN-pytorch-master/datasets"

Python 경로 목록:
/home/segmentsafestep/miniconda3/envs/jupyter-env/lib/python39.zip
/home/segmentsafestep/miniconda3/envs/jupyter-env/lib/python3.9
/home/segmentsafestep/miniconda3/envs/jupyter-env/lib/python3.9/lib-dynload

/home/segmentsafestep/miniconda3/envs/jupyter-env/lib/python3.9/site-packages
/home/segmentsafestep/miniconda3/envs/jupyter-env/lib/python3.9/site-packages/IPython/extensions
/home/segmentsafestep/.ipython
/content/drive/MyDrive/Colab Notebooks/AIFFELthon/bisenetv2_zh320


# Model definition

In [2]:
@register_model(aux_models)
class BiSeNetv2(nn.Module):
    def __init__(self, num_class=1, n_channel=3, act_type='relu', use_aux=True):
        super().__init__()
        self.use_aux = use_aux
        self.detail_branch = DetailBranch(n_channel, 128, act_type)
        self.semantic_branch = SemanticBranch(n_channel, 128, num_class, act_type, use_aux)
        self.bga_layer = BilateralGuidedAggregationLayer(128, 128, act_type)
        self.seg_head = SegHead(128, num_class, act_type)

    def forward(self, x, is_training=False):
        size = x.size()[2:]
        x_d = self.detail_branch(x)
        if self.use_aux:
            x_s, aux2, aux3, aux4, aux5 = self.semantic_branch(x)
        else:
            x_s = self.semantic_branch(x)
        x = self.bga_layer(x_d, x_s)
        x = self.seg_head(x)
        x = F.interpolate(x, size, mode='bilinear', align_corners=True)

        if self.use_aux and is_training:
            return x, (aux2, aux3, aux4, aux5)
        else:
            return x

class DetailBranch(nn.Sequential):
    def __init__(self, in_channels, out_channels, act_type='relu'):
        super().__init__(
            ConvBNAct(in_channels, 64, 3, 2, act_type=act_type),
            ConvBNAct(64, 64, 3, 1, act_type=act_type),
            ConvBNAct(64, 64, 3, 2, act_type=act_type),
            ConvBNAct(64, 64, 3, 1, act_type=act_type),
            ConvBNAct(64, 128, 3, 1, act_type=act_type),
            ConvBNAct(128, 128, 3, 2, act_type=act_type),
            ConvBNAct(128, 128, 3, 1, act_type=act_type),
            ConvBNAct(128, out_channels, 3, 1, act_type=act_type)
        )

class SemanticBranch(nn.Sequential):
    def __init__(self, in_channels, out_channels, num_class, act_type='relu', use_aux=False):
        super().__init__()
        self.use_aux = use_aux
        self.stage1to2 = StemBlock(in_channels, 16, act_type)
        self.stage3 = nn.Sequential(
                            GatherExpansionLayer(16, 32, 2, act_type),
                            GatherExpansionLayer(32, 32, 1, act_type),
                        )
        self.stage4 = nn.Sequential(
                            GatherExpansionLayer(32, 64, 2, act_type),
                            GatherExpansionLayer(64, 64, 1, act_type),
                        )
        self.stage5_1to4 = nn.Sequential(
                                GatherExpansionLayer(64, 128, 2, act_type),
                                GatherExpansionLayer(128, 128, 1, act_type),
                                GatherExpansionLayer(128, 128, 1, act_type),
                                GatherExpansionLayer(128, 128, 1, act_type),
                            )
        self.stage5_5 = ContextEmbeddingBlock(128, out_channels, act_type)

        if self.use_aux:
            self.seg_head2 = SegHead(16, num_class, act_type)
            self.seg_head3 = SegHead(32, num_class, act_type)
            self.seg_head4 = SegHead(64, num_class, act_type)
            self.seg_head5 = SegHead(128, num_class, act_type)

    def forward(self, x):
        x = self.stage1to2(x)
        if self.use_aux:
            aux2 = self.seg_head2(x)

        x = self.stage3(x)
        if self.use_aux:
            aux3 = self.seg_head3(x)

        x = self.stage4(x)
        if self.use_aux:
            aux4 = self.seg_head4(x)

        x = self.stage5_1to4(x)
        if self.use_aux:
            aux5 = self.seg_head5(x)

        x = self.stage5_5(x)

        if self.use_aux:
            return x, aux2, aux3, aux4, aux5
        else:
            return x

class StemBlock(nn.Module):
    def __init__(self, in_channels, out_channels, act_type='relu'):
        super().__init__()
        self.conv_init = ConvBNAct(in_channels, out_channels, 3, 2, act_type=act_type)
        self.left_branch = nn.Sequential(
                            ConvBNAct(out_channels, out_channels//2, 1, act_type=act_type),
                            ConvBNAct(out_channels//2, out_channels, 3, 2, act_type=act_type)
                    )
        self.right_branch = nn.MaxPool2d(3, 2, 1)
        self.conv_last = ConvBNAct(out_channels*2, out_channels, 3, 1, act_type=act_type)

    def forward(self, x):
        x = self.conv_init(x)
        x_left = self.left_branch(x)
        x_right = self.right_branch(x)
        x = torch.cat([x_left, x_right], dim=1)
        x = self.conv_last(x)

        return x

class GatherExpansionLayer(nn.Module):
    def __init__(self, in_channels, out_channels, stride, act_type='relu', expand_ratio=6,):
        super().__init__()
        self.stride = stride
        hid_channels = int(round(in_channels * expand_ratio))

        layers = [ConvBNAct(in_channels, in_channels, 3, act_type=act_type)]

        if stride == 2:
            layers.extend([
                            DWConvBNAct(in_channels, hid_channels, 3, 2, act_type='none'),
                            DWConvBNAct(hid_channels, hid_channels, 3, 1, act_type='none')
                        ])
            self.right_branch = nn.Sequential(
                                    DWConvBNAct(in_channels, in_channels, 3, 2, act_type='none'),
                                    PWConvBNAct(in_channels, out_channels, act_type='none')
                            )
        else:
            layers.append(DWConvBNAct(in_channels, hid_channels, 3, 1, act_type='none'))

        layers.append(PWConvBNAct(hid_channels, out_channels, act_type='none'))
        self.left_branch = nn.Sequential(*layers)
        self.act = Activation(act_type)

    def forward(self, x):
        res = self.left_branch(x)

        if self.stride == 2:
            res = self.right_branch(x) + res
        else:
            res = x + res

        return self.act(res)

class ContextEmbeddingBlock(nn.Module):
    def __init__(self, in_channels, out_channels, act_type='relu'):
        super().__init__()
        self.pool = nn.Sequential(
                            nn.AdaptiveAvgPool2d(1),
                            nn.BatchNorm2d(in_channels)
                    )
        self.conv_mid = ConvBNAct(in_channels, in_channels, 1, act_type=act_type)
        self.conv_last = conv3x3(in_channels, out_channels)

    def forward(self, x):
        res = self.pool(x)
        res = self.conv_mid(res)
        x = res + x
        x = self.conv_last(x)

        return x

class BilateralGuidedAggregationLayer(nn.Module):
    def __init__(self, in_channels, out_channels, act_type='relu'):
        super().__init__()
        self.detail_high = nn.Sequential(
                                    DWConvBNAct(in_channels, in_channels, 3, act_type=act_type),
                                    conv1x1(in_channels, in_channels)
                        )
        self.detail_low = nn.Sequential(
                                    DWConvBNAct(in_channels, in_channels, 3, 2, act_type=act_type),
                                    nn.AvgPool2d(3, 2, 1)
                        )
        self.semantic_high = nn.Sequential(
                                    ConvBNAct(in_channels, in_channels, 3, act_type=act_type),
                                    nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True),
                                    nn.Sigmoid()
                            )
        self.semantic_low = nn.Sequential(
                                    DWConvBNAct(in_channels, in_channels, 3, act_type=act_type),
                                    conv1x1(in_channels, in_channels),
                                    nn.Sigmoid()
                            )
        self.conv_last = ConvBNAct(in_channels, out_channels, 3, act_type=act_type)

    def forward(self, x_d, x_s):
        x_d_high = self.detail_high(x_d)
        x_d_low = self.detail_low(x_d)

        x_s_high = self.semantic_high(x_s)
        x_s_low = self.semantic_low(x_s)
        x_high = x_d_high * x_s_high
        x_low = x_d_low * x_s_low

        size = x_high.size()[2:]
        x_low = F.interpolate(x_low, size, mode='bilinear', align_corners=True)
        res = x_high + x_low
        res = self.conv_last(res)

        return res

In [3]:
model = BiSeNetv2(num_class=19)

# Load checkpoint

In [4]:
chk_pnt_path = "/home/segmentsafestep/Fast-SCNN-pytorch-master/weights/bisenetv2-aux.pth"
chk_pnt_map = torch.load(chk_pnt_path, map_location='cpu')

load_res = model.load_state_dict(chk_pnt_map["state_dict"], strict=False)

# 가중치가 잘 로드 되었는지 확인
load_res.missing_keys, load_res.unexpected_keys

# 파라미터 개수, 레이어 확인
tot_num_param = 0
for layer_name, params in model.named_parameters():
    print(layer_name,'---', params.shape)
    n_p = 1
    for p in params.shape:
        n_p *= p
    tot_num_param += n_p

print("tot_num_param: ", tot_num_param)

detail_branch.0.0.weight --- torch.Size([64, 3, 3, 3])
detail_branch.0.1.weight --- torch.Size([64])
detail_branch.0.1.bias --- torch.Size([64])
detail_branch.1.0.weight --- torch.Size([64, 64, 3, 3])
detail_branch.1.1.weight --- torch.Size([64])
detail_branch.1.1.bias --- torch.Size([64])
detail_branch.2.0.weight --- torch.Size([64, 64, 3, 3])
detail_branch.2.1.weight --- torch.Size([64])
detail_branch.2.1.bias --- torch.Size([64])
detail_branch.3.0.weight --- torch.Size([64, 64, 3, 3])
detail_branch.3.1.weight --- torch.Size([64])
detail_branch.3.1.bias --- torch.Size([64])
detail_branch.4.0.weight --- torch.Size([128, 64, 3, 3])
detail_branch.4.1.weight --- torch.Size([128])
detail_branch.4.1.bias --- torch.Size([128])
detail_branch.5.0.weight --- torch.Size([128, 128, 3, 3])
detail_branch.5.1.weight --- torch.Size([128])
detail_branch.5.1.bias --- torch.Size([128])
detail_branch.6.0.weight --- torch.Size([128, 128, 3, 3])
detail_branch.6.1.weight --- torch.Size([128])
detail_branch

# Evaluate

In [5]:
# 기본 라이브러리
import os
import time
import shutil

# PyTorch 관련 라이브러리
import torch.utils.data as data
import torch.backends.cudnn as cudnn

# 이미지 변환 관련 라이브러리
from torchvision import transforms

# 데이터 로더 및 모델, 유틸리티 함수 불러오기
from data_loader import get_segmentation_dataset  # 데이터셋 불러오는 함수
from utils.loss import MixSoftmaxCrossEntropyLoss, MixSoftmaxCrossEntropyOHEMLoss  # 손실 함수
from utils.lr_scheduler import LRScheduler  # 학습률 스케줄러
from utils.metric import SegmentationMetric  # 평가 지표

In [6]:
# 모델 및 학습 관련 하이퍼파라미터 설정
args = {
    "model": "BiSeNetv2",  # 사용할 모델 이름
    "dataset": "citys",  # 학습할 데이터셋 (예: Cityscapes)
    "base_size": 1024,  # 입력 이미지 기본 크기
    "crop_size": 768,  # 학습 시 사용할 크롭 크기
    "train_split": "train",  # 학습 데이터셋의 분할 방식

    # 학습 하이퍼파라미터
    "aux": False,  # 보조 손실 사용 여부
    "aux_weight": 0.4,  # 보조 손실 가중치
    "epochs": 1,  # 학습 에폭 수
    "start_epoch": 0,  # 학습 시작 에폭
    "batch_size": 2,  # 배치 크기
    "lr": 1e-2,  # 학습률
    "momentum": 0.9,  # 모멘텀
    "weight_decay": 1e-4,  # 가중치 감쇠 (L2 정규화)

    # 체크포인트 저장 위치
    "resume": None,  # 기존 모델 체크포인트 (사용하지 않음)
    "save_folder": "./weights",  # 모델 가중치 저장 경로

    # 평가 및 검증 설정
    "eval": False,  # 평가 모드 여부
    "no_val": True,  # 검증 생략 여부
}

# GPU 사용 여부 설정
print(device := torch.device("cuda:0" if torch.cuda.is_available() else "cpu"))
cudnn.benchmark = True  # GPU 연산 최적화
args["device"] = device  # 학습에 사용할 디바이스 저장

cuda:0


In [7]:
# 이미지 전처리 변환 설정
input_transform = transforms.Compose([
    transforms.ToTensor(),  # 이미지를 Tensor로 변환
    # transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
    transforms.Normalize([0.3257, 0.3690, 0.3223], [0.2112, 0.2148, 0.2115]),  # 이미지 정규화
])
# mean=(0.3257, 0.3690, 0.3223), # city, rgb
# std=(0.2112, 0.2148, 0.2115),

# 데이터셋 로드 (학습 및 검증)
data_kwargs = {"transform": input_transform, "base_size": args["base_size"], "crop_size": args["crop_size"]}
train_dataset = get_segmentation_dataset(args["dataset"], split=args["train_split"], mode="train", root=dataset_root, **data_kwargs)
val_dataset = get_segmentation_dataset(args["dataset"], split="val", mode="val", root=dataset_root, **data_kwargs)

# DataLoader 생성 (데이터 배치 단위로 로딩)
train_loader = data.DataLoader(dataset=train_dataset, batch_size=args["batch_size"], shuffle=True, num_workers=4 # 이거 수정해봐
                               , drop_last=True)
val_loader = data.DataLoader(dataset=val_dataset, batch_size=1, shuffle=False)

# 모델을 GPU 또는 CPU로 이동
dev_mod = model.to(args["device"])

Found 2975 images in /home/segmentsafestep/Fast-SCNN-pytorch-master/datasets/citys/leftImg8bit/train
Found 500 images in /home/segmentsafestep/Fast-SCNN-pytorch-master/datasets/citys/leftImg8bit/val


In [9]:
# 손실 함수 설정 (OHEM Loss 사용)
# criterion = MixSoftmaxCrossEntropyOHEMLoss(aux=args["aux"], aux_weight=args["aux_weight"], ignore_index=-1).to(args["device"])
criterion = MixSoftmaxCrossEntropyLoss(aux=args["aux"], aux_weight=args["aux_weight"], ignore_label=-1).to(args["device"])

# 옵티마이저 (SGD 사용)
optimizer = torch.optim.SGD(model.parameters(), lr=args["lr"], momentum=args["momentum"], weight_decay=args["weight_decay"])

# 학습률 스케줄러 설정 (poly decay 사용)
lr_scheduler = LRScheduler(mode="poly", base_lr=args["lr"], nepochs=args["epochs"],
                           iters_per_epoch=len(train_loader), power=0.9)

# 평가 지표 (mIoU 등)
metric = SegmentationMetric(train_dataset.num_class)

# 최고 성능 저장을 위한 변수
best_pred = 0.0

In [10]:
def train():
    global best_pred  # 최고 성능 비교를 위한 전역 변수 사용
    cur_iters = 0  # 현재 반복 횟수
    start_time = time.time()  # 학습 시작 시간

    # 각 에폭마다 학습 수행
    for epoch in range(args["start_epoch"], args["epochs"]):
        model.train()  # 모델을 학습 모드로 설정

        for i, (images, targets) in enumerate(train_loader):
            cur_lr = lr_scheduler(cur_iters)  # 현재 학습률 설정
            for param_group in optimizer.param_groups:
                param_group["lr"] = cur_lr  # 옵티마이저에 학습률 적용

            # 데이터를 GPU/CPU로 이동
            images, targets = images.to(args["device"]), targets.to(args["device"])

            # 모델 예측 및 손실 계산
            outputs = model(images)
            # outputs = outputs.argmax(dim=1) # BiSeNetV2_zh320
            loss = criterion((outputs,), targets)

            # 역전파 및 최적화 수행
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            cur_iters += 1  # 반복 횟수 증가
            if cur_iters % 200 == 0:  # 10회마다 로그 출력
                print(f"Epoch [{epoch}/{args['epochs']}], Step [{i}/{len(train_loader)}], Loss: {loss.item():.4f}")

        end_time = time.time()  # 에폭 종료 시간 기록
        epoch_time = end_time - start_time  # 소요 시간 계산
        print(f"Epoch {epoch} 완료! 소요 시간: {epoch_time:.2f} 초")

        # 5의 배수 에폭일 때만 저장
        if epoch % 5 == 0:
            save_path = os.path.join(args["save_folder"], f"model_19class_mioutest{epoch}.pth")
            torch.save({"epoch": epoch, "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), "cur_iters": cur_iters}, save_path)
            print(f"모델 가중치 저장됨: {save_path}")

        # 검증 수행 및 체크포인트 저장
        if not args["no_val"]:
            validation(epoch)

In [11]:
def validation(epoch):
    global best_pred  # 최고 성능 비교를 위한 전역 변수 사용
    model.eval()  # 모델을 평가 모드로 변경
    metric.reset()  # 평가 지표 초기화

    for i, (image, target) in enumerate(val_loader):
        image = image.to(args["device"])  # 이미지를 GPU/CPU로 이동

        with torch.no_grad():  # 그래디언트 계산 비활성화
            outputs = (model(image),)

        pred = torch.argmax(outputs[0], 1).cpu().data.numpy()  # 예측 결과 가져오기
        metric.update(pred, target.numpy())  # 평가 지표 업데이트

    pixAcc, mIoU = metric.get()  # 픽셀 정확도 및 mIoU 계산
    print(f"Epoch {epoch}, Validation PixAcc: {pixAcc:.3f}, mIoU: {mIoU:.3f}")

    new_pred = (pixAcc + mIoU) / 2  # 성능 평가
    if new_pred > best_pred:
        best_pred = new_pred

In [12]:
train()

Epoch [0/1], Step [199/1487], Loss: 1.0644
Epoch [0/1], Step [399/1487], Loss: 0.6966
Epoch [0/1], Step [599/1487], Loss: 2.0011
Epoch [0/1], Step [799/1487], Loss: 1.2523
Epoch [0/1], Step [999/1487], Loss: 0.8930
Epoch [0/1], Step [1199/1487], Loss: 0.7980
Epoch [0/1], Step [1399/1487], Loss: 0.8974
Epoch 0 완료! 소요 시간: 226.88 초
모델 가중치 저장됨: ./weights/model_19class_mioutest0.pth


In [22]:
# 기본 라이브러리
import os
import torch
import torch.utils.data as data
from data_loader import get_segmentation_dataset  # 데이터셋 로드 함수
from utils.metric import SegmentationMetric  # 평가 지표

In [36]:
# 설정 (Jupyter Notebook 환경에 맞게 변경)
args = {
    "dataset": "citys",  # 사용할 데이터셋 (Cityscapes)
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),  # GPU/CPU 설정
    "batch_size": 1,  # 평가 시 배치 크기
    "num_workers": 0,  # DataLoader의 worker 개수 (메모리 문제 방지 위해 0으로 설정)
    "weight_path": "/home/segmentsafestep/Fast-SCNN-pytorch-master/weights/model_19class_mioutest0.pth"  # 저장된 가중치 경로
}

# 가중치 파일 확인
if not os.path.exists(args["weight_path"]):
    raise FileNotFoundError(f"가중치 파일이 존재하지 않습니다: {args['weight_path']}")

# 이미지 전처리 설정
input_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
])

# 검증 데이터셋 로드 (Cityscapes val set)
val_dataset = get_segmentation_dataset(args["dataset"], split="val", mode="testval", transform=input_transform, root="/home/segmentsafestep/Fast-SCNN-pytorch-master/datasets")
val_loader = data.DataLoader(
    dataset=val_dataset,
    batch_size=args["batch_size"],
    shuffle=False,
    num_workers=args["num_workers"]
)

# 모델 및 가중치 로드
# model = BiSeNetv2(num_class=val_dataset.num_class).to(args["device"])  # 모델 생성

# 저장된 가중치 파일 로드
checkpoint = torch.load(args["weight_path"], map_location=args["device"])

# 가중치 파일에 state_dict가 포함되어 있으면, 모델 가중치만 추출하여 로드
if "state_dict" in checkpoint:
    model.load_state_dict(checkpoint["state_dict"])  # state_dict만 로드
else:
    model.load_state_dict(checkpoint)  # 이미 순수한 가중치 파일이면 그대로 로드

model.eval()  # 모델을 평가 모드로 설정
print("모델 및 가중치 로드 완료!")

# 평가 지표 설정 (mIoU 및 픽셀 정확도)
metric = SegmentationMetric(val_dataset.num_class)

Found 500 images in /home/segmentsafestep/Fast-SCNN-pytorch-master/datasets/citys/leftImg8bit/val
모델 및 가중치 로드 완료!


In [37]:
# 검증 수행 및 mIoU 계산
print("검증 데이터셋 평가 중...")
with torch.no_grad():
    for i, (image, label) in enumerate(val_loader):
        image = image.to(args["device"])
        label = label.cpu().numpy().squeeze()  # GPU -> CPU 후 불필요한 차원 제거

        # 모델 예측 수행
        outputs = model(image)

        # 모델 출력이 (C, W)로 잘못 나오는 경우, 올바르게 변환
        if len(outputs.shape) == 3:  # (C, H, W) 형태라면 batch 차원이 없음 -> 추가
            outputs = outputs.unsqueeze(0)  # (1, C, H, W) 형태로 변환
        elif len(outputs.shape) == 2:  # (C, W) 형태라면 차원 재배열 필요
            outputs = outputs.view(1, outputs.shape[0], 1, outputs.shape[1])  # (1, C, 1, W)로 변환
        
        # 예측 결과 계산
        pred = torch.argmax(outputs, dim=1).cpu().numpy().squeeze()  # (H, W) 형태로 변환
        
        # 평가 지표 업데이트
        metric.update(pred, label)

        # 10개마다 중간 결과 출력
        if (i + 1) % 10 == 0:
            pixAcc, mIoU = metric.get()
            print(f"Sample {i+1}: Pixel Accuracy = {pixAcc * 100:.2f}%, mIoU = {mIoU * 100:.2f}%")

# 최종 평가 결과 출력
final_pixAcc, final_mIoU = metric.get()
print("\n최종 평가 결과")
print(f"Pixel Accuracy: {final_pixAcc * 100:.2f}%")
print(f"Mean IoU (mIoU): {final_mIoU * 100:.2f}%")

검증 데이터셋 평가 중...
Sample 10: Pixel Accuracy = 67.37%, mIoU = 16.02%
Sample 20: Pixel Accuracy = 65.69%, mIoU = 15.06%
Sample 30: Pixel Accuracy = 63.89%, mIoU = 15.34%


KeyboardInterrupt: 