In [1]:
import torch

if torch.cuda.is_available():
    print("GPU is available.")
    device = torch.device("cuda")
else:
    print("GPU is not available.")
    device = torch.device("cpu")

GPU is available.


In [2]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [3]:
# 필요한 라이브러리 import 

import os
import sys
import cv2
import numpy as np
from typing import Tuple, Sequence, Callable, Dict

import torch
from torch import Tensor
from torch.utils.data import Dataset

from torch import nn
# from torchvision.models.detection import keypointrcnn_resnet50_fpn
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.ops import MultiScaleRoIAlign
from torchvision.models.detection import KeypointRCNN

import pandas as pd
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

from typing import Tuple 
import albumentations as A
from albumentations.pytorch import ToTensorV2

import time
import torch.optim as optim



In [4]:
# 학습할 수 있도록 데이터 전처리 클래스

class KeypointDataset(Dataset):
    def __init__(
        self,
        image_dir: os.PathLike,
        label_df: pd.DataFrame,
        transforms: Sequence[Callable]=None
    ) -> None:
        self.image_dir = image_dir
        self.df = label_df
        self.transforms = transforms

    def __len__(self) -> int:
        return self.df.shape[0]
    
    def __getitem__(self, index: int) -> Tuple[Tensor, Dict]:
        image_id = self.df.iloc[index, 1]
        labels = np.array([1])

        keypoints = self.df.iloc[index, 2:].values.reshape(-1, 2).astype(np.int64)
    
        x1, y1 = min(keypoints[:, 0]), min(keypoints[:, 1])
        x2, y2 = max(keypoints[:, 0]), max(keypoints[:, 1])
        

        
        boxes = np.array([[x1, y1, x2, y2]], dtype=np.int64)
        image_path = os.path.join(self.image_dir, image_id)
        img_array = np.fromfile(image_path, np.uint8)
        image = cv2.imdecode(img_array, cv2.COLOR_BGR2RGB)
        targets ={
            'image': image,
            'bboxes': boxes,
            'labels': labels,
            'keypoints': keypoints
        }
#         print('bboxes : ' , boxes)
        
        if self.transforms is not None:
            targets = self.transforms(**targets)

        image = targets['image']
        image = image / 255.0
   

        targets = {
            'labels': torch.as_tensor(targets['labels'], dtype=torch.int64),
            'boxes': torch.as_tensor(targets['bboxes'], dtype=torch.float32),
            'keypoints': torch.as_tensor(
                np.concatenate([targets['keypoints'], np.ones((15, 1))], axis=1)[np.newaxis], dtype=torch.float32
            )
        }

        return image, targets

In [5]:
# 데이터를 저장한 파일을 불러와서 학습용/ 검즘용 데이터로 나눠서 전처리 함수에 전달

def collate_fn(batch: torch.Tensor)->Tuple:
    return tuple(zip(*batch))

# Data Transform & Train-Test-Split
def load_data(image_dir, train_key, valid_key):
    transforms = A.Compose([
        # A.Resize(500, 500, always_apply=True),
        A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ToTensorV2()
    ],  bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']),
        keypoint_params=A.KeypointParams(format='xy')
    )

    trainset = KeypointDataset(image_dir, train_key, transforms)
    validset = KeypointDataset(image_dir, valid_key, transforms)
    train_loader = DataLoader(trainset, batch_size=4, shuffle=True, collate_fn=collate_fn)
    valid_loader = DataLoader(validset, batch_size=4, shuffle=False, collate_fn=collate_fn)

    return train_loader, valid_loader

In [6]:
def get_model() -> nn.Module:
    backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers = 2) # resnet101, resnet152 
    roi_pooler = MultiScaleRoIAlign(
        featmap_names=['0', '1', '2', '3'],
        output_size=7,
        sampling_ratio=2
    )

    keypoint_roi_pooler = MultiScaleRoIAlign(
        featmap_names=['0', '1', '2', '3'],
        output_size=14,
        sampling_ratio=2
    )

    model = KeypointRCNN(
        backbone, 
        num_classes=2,
        num_keypoints=15,
        box_roi_pool=roi_pooler,
        keypoint_roi_pool=keypoint_roi_pooler
    )

    return model

In [7]:
def train(model, train_loader, optimizer, epoch, device = 'cuda'):
    model.train()                                        
    total_loss = 0
    for batch_idx, (images, targets) in enumerate(train_loader):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]  

        optimizer.zero_grad()
        losses = model(images, targets)
        loss = losses['loss_keypoint']
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        if (batch_idx+1) % 200 == 0:
            print(f'| epoch: {epoch} | batch: {batch_idx+1}/{len(train_loader)} | batch loss: {loss.item()}')

    return total_loss / len(train_loader)

def evaluate(model, test_loader, device = 'cuda'):
    model.train()      
    test_loss = 0      # test_loss 초기화
    
    with torch.no_grad(): 
        for images, targets in test_loader:
            # data, target 값 DEVICE에 할당
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]  

            losses = model(images, targets)                       # validation loss
            test_loss += float(losses['loss_keypoint'])           # sum of all loss 
    
    test_loss /= len(test_loader.dataset)                         # 평균 loss
    return test_loss

def train_model(train_loader, val_loader, model_path=None, num_epochs=30, device='cuda'):
    if not os.path.exists(model_path):
        model = get_model()
    else:
        model = torch.load(model_path)
    model.to(device)
    
    best_loss = 999999  # initialize best loss
    optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)

    for epoch in range(1, num_epochs+1):
        since = time.time()
        train(model, train_loader, optimizer, epoch, device)
        train_loss = train(model, train_loader, optimizer, epoch, device)
        val_loss = evaluate(model, val_loader)
        print('Train Keypoint Loss (avg): {:.4f}'.format(train_loss))

        if val_loss <= best_loss:   # update best loss
            best_loss = val_loss
            torch.save(model, './models/RCNN_ep'+str(epoch)+'_'+str(best_loss)+'.pt')
            print('Best Model Saved, Loss: ', val_loss)
        
        time_elapsed = time.time()-since
        print()
        print('---------------------- epoch {} ------------------------'.format(epoch))
        print('Train Keypoint Loss: {:.4f}, Val Keypoint Loss: {:.4f}'.format(train_loss, val_loss))   
        print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed//60, time_elapsed%60))
        print()

def main():
    current_folder = globals()['_dh'][0]
    path = os.path.dirname(os.path.join(current_folder,''))
    os.chdir(path)
    
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

    train_img_path = './images'
    train_key_path = './filename.csv'

    # Load the entire DataFrame and split it into parts.
    total_df = pd.read_csv(train_key_path)
    num_parts = 10
    data_parts = np.array_split(total_df, num_parts)

    model_path = None

    # Train on each part sequentially.
    for i in range(num_parts):
        print(f"Training on part {i + 1}/{num_parts}")
        train_key, valid_key = train_test_split(data_parts[i], test_size=0.2, random_state=42)
        train_loader, valid_loader = load_data(train_img_path, train_key, valid_key)

        model_path = f"./models/RCNN_part{i + 1}.pt"
        train_model(train_loader, valid_loader, model_path=model_path, num_epochs=30, device=DEVICE)
    '''
    default: epoch - 30, 
             device - cuda
    '''


In [None]:
main()

Training on part 1/10




| epoch: 1 | batch: 200/9771 | batch loss: 7.599041938781738
| epoch: 1 | batch: 400/9771 | batch loss: 7.365864276885986
| epoch: 1 | batch: 600/9771 | batch loss: 7.076773166656494
| epoch: 1 | batch: 800/9771 | batch loss: 7.224649429321289
| epoch: 1 | batch: 1000/9771 | batch loss: 7.002455234527588
| epoch: 1 | batch: 1200/9771 | batch loss: 6.897069931030273
| epoch: 1 | batch: 1400/9771 | batch loss: 7.4398603439331055
| epoch: 1 | batch: 1600/9771 | batch loss: 7.121744155883789
| epoch: 1 | batch: 1800/9771 | batch loss: 6.6767258644104
| epoch: 1 | batch: 2000/9771 | batch loss: 6.605391979217529
| epoch: 1 | batch: 2200/9771 | batch loss: 6.690028190612793
| epoch: 1 | batch: 2400/9771 | batch loss: 6.499427795410156
| epoch: 1 | batch: 2600/9771 | batch loss: 6.205638408660889
| epoch: 1 | batch: 2800/9771 | batch loss: 6.118190765380859
| epoch: 1 | batch: 3000/9771 | batch loss: 6.384411334991455
| epoch: 1 | batch: 3200/9771 | batch loss: 6.938908576965332
| epoch: 1 | 

| epoch: 2 | batch: 6800/9771 | batch loss: 4.596799850463867
| epoch: 2 | batch: 7000/9771 | batch loss: 4.369269371032715
| epoch: 2 | batch: 7200/9771 | batch loss: 5.125364303588867
| epoch: 2 | batch: 7400/9771 | batch loss: 4.414334774017334
| epoch: 2 | batch: 7600/9771 | batch loss: 4.27556848526001
| epoch: 2 | batch: 7800/9771 | batch loss: 4.216588973999023
| epoch: 2 | batch: 8000/9771 | batch loss: 4.8356709480285645
| epoch: 2 | batch: 8200/9771 | batch loss: 5.312934398651123
| epoch: 2 | batch: 8400/9771 | batch loss: 5.856332778930664
| epoch: 2 | batch: 8600/9771 | batch loss: 4.756992340087891
| epoch: 2 | batch: 8800/9771 | batch loss: 4.028939247131348
| epoch: 2 | batch: 9000/9771 | batch loss: 5.43113374710083
| epoch: 2 | batch: 9200/9771 | batch loss: 5.174344539642334
| epoch: 2 | batch: 9400/9771 | batch loss: 5.216297626495361
| epoch: 2 | batch: 9600/9771 | batch loss: 5.576000213623047
| epoch: 2 | batch: 200/9771 | batch loss: 3.7405078411102295
| epoch: 

| epoch: 3 | batch: 3800/9771 | batch loss: 4.274242877960205
| epoch: 3 | batch: 4000/9771 | batch loss: 4.812987804412842
| epoch: 3 | batch: 4200/9771 | batch loss: 4.016681671142578
| epoch: 3 | batch: 4400/9771 | batch loss: 4.905849456787109
| epoch: 3 | batch: 4600/9771 | batch loss: 4.50931978225708
| epoch: 3 | batch: 4800/9771 | batch loss: 4.168575286865234
| epoch: 3 | batch: 5000/9771 | batch loss: 4.111140251159668
| epoch: 3 | batch: 5200/9771 | batch loss: 5.106230735778809
| epoch: 3 | batch: 5400/9771 | batch loss: 5.177259922027588
| epoch: 3 | batch: 5600/9771 | batch loss: 4.2205610275268555
| epoch: 3 | batch: 5800/9771 | batch loss: 3.629570484161377
| epoch: 3 | batch: 6000/9771 | batch loss: 4.4790425300598145
| epoch: 3 | batch: 6200/9771 | batch loss: 4.2080278396606445
| epoch: 3 | batch: 6400/9771 | batch loss: 4.340909481048584
| epoch: 3 | batch: 6600/9771 | batch loss: 3.6669974327087402
| epoch: 3 | batch: 6800/9771 | batch loss: 5.211674213409424
| epo

| epoch: 5 | batch: 200/9771 | batch loss: 4.3098530769348145
| epoch: 5 | batch: 400/9771 | batch loss: 4.503491401672363
| epoch: 5 | batch: 600/9771 | batch loss: 3.634697675704956
| epoch: 5 | batch: 800/9771 | batch loss: 4.908721446990967
| epoch: 5 | batch: 1000/9771 | batch loss: 3.170208215713501
| epoch: 5 | batch: 1200/9771 | batch loss: 4.877126693725586
| epoch: 5 | batch: 1400/9771 | batch loss: 3.359387159347534
| epoch: 5 | batch: 1600/9771 | batch loss: 4.196415424346924
| epoch: 5 | batch: 1800/9771 | batch loss: 3.23534893989563
| epoch: 5 | batch: 2000/9771 | batch loss: 4.375489234924316
| epoch: 5 | batch: 2200/9771 | batch loss: 4.165219306945801
| epoch: 5 | batch: 2400/9771 | batch loss: 2.84057354927063
| epoch: 5 | batch: 2600/9771 | batch loss: 4.901301383972168
| epoch: 5 | batch: 2800/9771 | batch loss: 3.4888041019439697
| epoch: 5 | batch: 3000/9771 | batch loss: 3.768826723098755
| epoch: 5 | batch: 3200/9771 | batch loss: 4.0182905197143555
| epoch: 5 