참고 : 
- https://hwanny-yy.tistory.com/3
- https://dacon.io/competitions/official/235672/codeshare/1795

In [1]:
import torch
import cv2
import random
import numpy as np
import config

from PIL import Image
import torchvision.transforms.functional as TF

from dataset import ImgDataset, FashionDataset
from util.utils import tensor2img, apply_mask, Compose, Resize, ToTensor, collate_fn
import config

import torch
from torch.utils.data import DataLoader
from torch.optim import Adam

from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

import os
import numpy as np

from PIL import Image
from pycocotools.coco import COCO

import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms

  warn(f"Failed to load image Python extension: {e}")


# Dataset

In [2]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

- data 폴더 내 사진들이 모여있는 구조
- size를 일치시켜야 하는 문제 존재

## Crawling Dataset

In [3]:
class ImgDataset(Dataset):
    def __init__(self, max_size, transforms_=None):
        self.transform = transforms.Compose(transforms_)
        files = []
        file_list = os.listdir('data/')
        for file in file_list:
            # 이 때 Img의 길이를 일치시켜야 하는 문제 존재
            img = Image.open(f'data/{file}').resize((max_size, max_size), Image.LANCZOS)
            files.append(self.transform(img))

        self.files = files

    def __getitem__(self, index):
        return self.files[index].to(device)

    def __len__(self):
        return len(self.files)

## Train Dataset

In [None]:
class FashionDataset(Dataset):
    def __init__(self, path, transforms=None):
        self.coco = COCO(path)
        self.image_ids = list(self.coco.imgToAnns.keys())
        self.transforms = transforms

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        file_name = self.coco.loadImgs(image_id)[0]['file_name']
        file_name = f'./data/fashion/train/{file_name}'
        image = Image.open(file_name).convert('RGB')

        annot_ids = self.coco.getAnnIds(imgIds=image_id)
        annots = [x for x in self.coco.loadAnns(annot_ids) if x['image_id'] == image_id]

        boxes = np.array([annot['bbox'] for annot in annots], dtype=np.float32)
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]

        labels = np.array([annot['category_id'] for annot in annots], dtype=np.int32)
        masks = np.array([self.coco.annToMask(annot) for annot in annots], dtype=np.uint8)

        area = np.array([annot['area'] for annot in annots], dtype=np.float32)
        iscrowd = np.array([annot['iscrowd'] for annot in annots], dtype=np.uint8)

        target = {
            'boxes': boxes,
            'masks': masks,
            'labels': labels,
            'area': area,
            'iscrowd': iscrowd}

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        target['boxes'] = torch.as_tensor(target['boxes'], dtype=torch.float32)
        target['masks'] = torch.as_tensor(target['masks'], dtype=torch.uint8)
        target['labels'] = torch.as_tensor(target['labels'], dtype=torch.int64)
        target['area'] = torch.as_tensor(target['area'], dtype=torch.float32)
        target['iscrowd'] = torch.as_tensor(target['iscrowd'], dtype=torch.uint8)

        return image, target

# Util

## Crawling Data Process

### Tensor > Image

In [5]:
def tensor2img(tensor):
    tensor = 127.5 * (tensor[0].data.cpu().float().numpy() + 1.0)
    img = tensor.astype(np.uint8)
    img = np.transpose(img, (1, 2, 0))
    return img

### Mask 시각화

In [6]:
def apply_mask(image, mask, labels, boxes, file_name):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    alpha = 1
    beta = 0.6  # transparency for the segmentation map
    gamma = 0  # scalar added to each sum
    COLORS = np.random.uniform(0, 255, size=(len(class_names), 3))
    _, _, w, h = mask.shape
    segmentation_map = np.zeros((w, h, 3), np.uint8)

    for n in range(mask.shape[0]):
        if labels[n] == 0:
            continue
        else:
            color = COLORS[random.randrange(0, len(COLORS))]
            segmentation_map[:, :, 0] = np.where(mask[n] > 0.5, COLORS[labels[n]][0], 0)
            segmentation_map[:, :, 1] = np.where(mask[n] > 0.5, COLORS[labels[n]][1], 0)
            segmentation_map[:, :, 2] = np.where(mask[n] > 0.5, COLORS[labels[n]][2], 0)
            image = cv2.addWeighted(image, alpha, segmentation_map, beta, gamma, dtype=cv2.CV_8U)

        # draw the bounding boxes around the objects
        cv2.rectangle(image, boxes[n][0], boxes[n][1], color=color, thickness=2)

        print(class_names[labels[n]])
        # put the label text above the objects
        cv2.putText(image, class_names[labels[n]], (boxes[n][0][0], boxes[n][0][1] - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, color,
                    thickness=2, lineType=cv2.LINE_AA)
    # image save
    cv2.imwrite(f'save/{file_name}.png', image)

## Train Data Process

In [None]:
class Compose:
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for transform in self.transforms:
            image, target = transform(
                image, target)

        return image, target


class Resize:
    def __init__(self, size, interpolation=Image.BILINEAR):
        self.size = size
        self.interpolation = interpolation

    def __call__(self, image, target):
        w, h = image.size
        image = image.resize(self.size)

        _masks = target['masks'].copy()
        masks = np.zeros((_masks.shape[0], self.size[0], self.size[1]))

        for i, v in enumerate(_masks):
            v = Image.fromarray(v).resize(self.size, resample=Image.BILINEAR)
            masks[i] = np.array(v, dtype=np.uint8)

        target['masks'] = masks
        target['boxes'][:, [0, 2]] *= self.size[0] / w
        target['boxes'][:, [1, 3]] *= self.size[1] / h

        return image, target


class ToTensor:
    def __call__(self, image, target):
        image = TF.to_tensor(image)

        return image, target


def collate_fn(batch):
    return tuple(zip(*batch))

# Train(Open source)

## HyperParameter

In [7]:
lr = config.lr
num_epochs = config.num_epochs
batch_size = config.batch_size
hidden_layer = config.hidden_layer

classes = config.classes
num_classes = len(classes)
max_size = config.max_size
score_threshold = config.score_threshold

## Model

In [None]:
model = maskrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, len(classes)+1)
model.to(device)

## 데이터 셋 로드

In [8]:
transform = Compose([Resize((max_size, max_size)), ToTensor()])
train_dataset = FashionDataset('data/fashion/train.json', transforms=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

## 학습

In [None]:
model.train()
for epoch in range(num_epochs):
    for i, (images, targets) in enumerate(train_loader):
        optimizer.zero_grad()
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        losses = model(images, targets)
        loss = sum(loss for loss in losses.values())

        print(
            f"{epoch}, {i}, C: {losses['loss_classifier'].item():.5f}, M: {losses['loss_mask'].item():.5f}, "
            f"B: {losses['loss_box_reg'].item():.5f}, O: {losses['loss_objectness'].item():.5f}, T: {loss.item():.5f}")
        loss.backward()
        optimizer.step()

# 실제 데이터(Crawling Data)

## 데이터 셋 로드

In [None]:
test_dataset = ImgDataset(transform, max_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

## 테스트

In [9]:
model.eval()
for idx, data in enumerate(test_dataloader):

    result = model(data)

    image = tensor2img(data)
    scores = list(result[0]['scores'].detach().cpu().numpy())
    thresholded_preds_inidices = [scores.index(i) for i in scores if i > score_threshold]
    thresholded_preds_count = len(thresholded_preds_inidices)
    mask = result[0]['masks']
    mask = mask[:thresholded_preds_count]
    labels = result[0]['labels']
    boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in result[0]['boxes']]
    boxes = boxes[:thresholded_preds_count]

    mask = mask.data.float().cpu().numpy()

    apply_mask(image, mask, labels, boxes, idx)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


RuntimeError: Couldn't load custom C++ ops. This can happen if your PyTorch and torchvision versions are incompatible, or if you had errors while compiling torchvision from source. For further information on the compatible versions, check https://github.com/pytorch/vision#installation for the compatibility matrix. Please check your PyTorch version with torch.__version__ and your torchvision version with torchvision.__version__ and verify if they are compatible, and if not please reinstall torchvision so that it matches your PyTorch install.