## Import

In [4]:
import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## Utils

In [5]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

## Custom Dataset

In [6]:
class SatelliteDataset(Dataset):
    def __init__(self, csv_file, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.infer = infer

    def __len__(self):
        # print(len(self.data))
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 1]
        # print(img_path)
        
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image

        # plt.imshow(image)
        # plt.show()

        mask_rle = self.data.iloc[idx, 2]
        mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        # plt.imshow(mask)
        # plt.show()

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']
            # print("image, mask type:",type(image),type(mask))
            # plt.imshow(image)
            # plt.imshow(mask)
            # plt.show()
            # plt.show()
        return image, mask

## Data Loader

In [7]:
transform = A.Compose(
    [   
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ]
)

dataset = SatelliteDataset(csv_file='./train_all_img_except_5percent.csv', transform=transform)
# print(dataset.data)

# for idx in range(10):
#     img_path = dataset.data.iloc[idx, 1]
#     image = cv2.imread(img_path)
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#     plt.imshow(image)
#     plt.show()
 
#     mask_rle = dataset.data.iloc[idx, 2]
#     mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))
#     plt.imshow(mask)
#     plt.show()

dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

print("Dataset Size:", len(dataset))

# for batch_idx, (images, labels) in enumerate(dataloader):
#     print(f"Batch {batch_idx+1} - Batch Size:", len(images))

Dataset Size: 2929


In [8]:
# train_features, train_labels = next(iter(dataloader))
# print(train_features[0])
# print(train_labels[0])

## Define Model - Unet

In [9]:
# U-Net의 기본 구성 요소인 Double Convolution Block을 정의합니다.
def double_conv(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, 3, padding=1),
        nn.ReLU(inplace=True)
    )

# 간단한 U-Net 모델 정의
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        self.dconv_down1 = double_conv(3, 64)
        self.dconv_down2 = double_conv(64, 128)
        self.dconv_down3 = double_conv(128, 256)
        self.dconv_down4 = double_conv(256, 512)

        self.maxpool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)        

        self.dconv_up3 = double_conv(256 + 512, 256)
        self.dconv_up2 = double_conv(128 + 256, 128)
        self.dconv_up1 = double_conv(128 + 64, 64)

        self.conv_last = nn.Conv2d(64, 1, 1)

    def forward(self, x):
        conv1 = self.dconv_down1(x)
        x = self.maxpool(conv1)

        conv2 = self.dconv_down2(x)
        x = self.maxpool(conv2)
        
        conv3 = self.dconv_down3(x)
        x = self.maxpool(conv3)   

        x = self.dconv_down4(x)

        x = self.upsample(x)        
        x = torch.cat([x, conv3], dim=1)

        x = self.dconv_up3(x)
        x = self.upsample(x)        
        x = torch.cat([x, conv2], dim=1)       

        x = self.dconv_up2(x)
        x = self.upsample(x)        
        x = torch.cat([x, conv1], dim=1)   

        x = self.dconv_up1(x)

        out = self.conv_last(x)

        return out

## Model Train - Unet

In [10]:
# model 초기화
model = UNet().to(device)

# loss function과 optimizer 정의
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# training loop
for epoch in range(10):  # 10 에폭 동안 학습합니다.
    model.train()
    epoch_loss = 0
    for images, masks in tqdm(dataloader):
        # print(len(images))
        images = images.float().to(device)
        masks = masks.float().to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks.unsqueeze(1))
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {epoch_loss/len(dataloader)}')

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
100%|██████████| 184/184 [03:35<00:00,  1.17s/it]


Epoch 1, Loss: 0.3068291569209617


100%|██████████| 184/184 [02:55<00:00,  1.05it/s]


Epoch 2, Loss: 0.20318827008747536


100%|██████████| 184/184 [02:56<00:00,  1.04it/s]


Epoch 3, Loss: 0.18272510802616243


100%|██████████| 184/184 [02:56<00:00,  1.04it/s]


Epoch 4, Loss: 0.170081102086798


100%|██████████| 184/184 [02:55<00:00,  1.05it/s]


Epoch 5, Loss: 0.15697910612368066


100%|██████████| 184/184 [02:54<00:00,  1.05it/s]


Epoch 6, Loss: 0.14967667270937693


100%|██████████| 184/184 [02:54<00:00,  1.06it/s]


Epoch 7, Loss: 0.13962409264691497


100%|██████████| 184/184 [02:55<00:00,  1.05it/s]


Epoch 8, Loss: 0.13978439658556296


100%|██████████| 184/184 [02:55<00:00,  1.05it/s]


Epoch 9, Loss: 0.13168264787806117


100%|██████████| 184/184 [02:53<00:00,  1.06it/s]

Epoch 10, Loss: 0.1332763957021677





## Inference - Unet

In [11]:
test_dataset = SatelliteDataset(csv_file='./test.csv', transform=transform, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [12]:
with torch.no_grad():
    model.eval()
    result = []

    for images in tqdm(test_dataloader):
        images = images.float().to(device)
        
        outputs = model(images)
        masks = torch.sigmoid(outputs).cpu().numpy()
        masks = np.squeeze(masks, axis=1)
        masks = (masks > 0.35).astype(np.uint8) # Threshold = 0.35
        
        for i in range(len(images)):
            mask_rle = rle_encode(masks[i])
            if mask_rle == '': # 예측된 건물 픽셀이 아예 없는 경우 -1
                result.append(-1)
            else:
                result.append(mask_rle)

            # if i<10:

            #     # 이미지 시각화를 위해 예측된 이미지 저장
            #     visualized_image = images[i].cpu().numpy().transpose((1, 2, 0))
            #     masks_visualized = masks[i] * 255

            #     plt.subplot(1, 2, 1)
            #     plt.imshow(visualized_image)
            #     plt.title("Input Image")

            #     plt.subplot(1, 2, 2)
            #     plt.imshow(masks_visualized, cmap='gray')
            #     plt.title("Predicted Mask")

            #     plt.show()

100%|██████████| 3790/3790 [16:25<00:00,  3.85it/s]


## Submission

In [14]:
submit = pd.read_csv('./sample_submission.csv')
submit['mask_rle'] = result

In [15]:
submit.to_csv('./submit.csv', index=False)