In [83]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm
from PIL import Image
import json

import torch
import torch.nn as nn
import torch.utils.data as D
import torch.nn.functional as F
import torchvision.transforms as T
import torchvision.models as models


import timm
from sklearn.preprocessing import MultiLabelBinarizer

In [6]:
timm.list_models('convnext_base')

['convnext_base']

In [7]:
class CustomDataset(D.Dataset):
    """
    path = {BASE_PATH,DATA_DIR1, DATA_DIR2 ,CSV_PATH}
    Return: pytorch custome dataset format 
    """
    def __init__(self, path, data, label, transform=None):
        self.path = path # 경로 설정
        self.data = data # image 데이터
        self.label = label # label 데이터
        self.transform = transform # 이미지 변환기
#         self.diagonal_reverse = diagonal_reverse
#         self.add_noise = add_noise
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = Image.open(self.path + self.data[idx])
        label = self.label[idx] 
            
        if self.transform:
            image = self.transform(image)
#             image = self.diagonal_reverse(image)
#             image = add_noise(image)
        
        return image, label

In [84]:
class My_model(nn.Module):
    def __init__(self, pretrained):
        super(My_model, self).__init__()
#         self.conv2d = nn.Conv2d(1, 3, 3, stride=1)
        #self.pretrained = models.resnet50()
        
        self.pretrained = pretrained
        self.FC = nn.Linear(1000, 8)

    def forward(self, x):

#         x = F.relu(self.conv2d(x))

        
        x = self.pretrained(x)

        # 마지막 출력에 nn.Linear를 추가
        # multilabel을 예측해야 하기 때문에
        # softmax가 아닌 sigmoid를 적용
        x = torch.sigmoid(self.FC(x))
        return x

In [9]:
# def save(state, SAVE_DIR, epoch, model, optimizer): 
#     with open(SAVE_DIR + state +".pt", "wb") as f:
#         torch.save({
#             'epoch': epoch,
#             'model_state_dict': model.state_dict(),
#             'optimizer_state_dict': optimizer.state_dict()},
#             f)

In [21]:
base_path = '/data/dmc/2_coco/'
with open(base_path + 'annotations/instances_default.json','r') as f:
    data = json.load(f)
# namelist = os.listdir(base_path)
# labels = pd.read_csv(base_path + "dirty_mnist_2nd_answer.csv").to_numpy()[:, 1:]


In [27]:
train_name = sorted(list(map(lambda x: x.replace('.txt','.png'),os.listdir('/data/dmc/3_dota/clean_train'))))
val_name = sorted(list(map(lambda x: x.replace('.txt','.png'),os.listdir('/data/dmc/3_dota/clean_val'))))
test_name = sorted(list(map(lambda x: x.replace('.txt','.png'),os.listdir('/data/dmc/3_dota/clean_test'))))

In [44]:
train_cls = []
images_info = data['images']
namelist = list(map(lambda x: x['file_name'],data['images']))
for i in train_name:
    idx = namelist.index(i)
    cls = images_info[idx]['class']
    train_cls.append(cls)
ohe = MultiLabelBinarizer()
ohe_train_cls = ohe.fit_transform(train_cls)

In [48]:
val_cls = []
images_info = data['images']
namelist = list(map(lambda x: x['file_name'],data['images']))
for i in val_name:
    idx = namelist.index(i)
    cls = images_info[idx]['class']
    val_cls.append(cls)
ohe = MultiLabelBinarizer()
ohe_val_cls = ohe.fit_transform(val_cls)

In [50]:
test_cls = []
images_info = data['images']
namelist = list(map(lambda x: x['file_name'],data['images']))
for i in test_name:
    idx = namelist.index(i)
    cls = images_info[idx]['class']
    test_cls.append(cls)
ohe = MultiLabelBinarizer()
ohe_test_cls = ohe.fit_transform(test_cls)

In [52]:
save_dir = '/data/dmc/cls_ckpt'
os.makedirs(save_dir, exist_ok=True)
BATCH_SIZE = 16

In [55]:
img_sum = np.zeros(3)
img_sq_sum = np.zeros(3)
num_pixels = 0.
data_dir = '/data/dmc/2_coco/images'
list_dir = os.listdir(data_dir)
num_files = len(list_dir)
for i, file in enumerate(list_dir):
#     if i > 3: break
    file_path = os.path.join(data_dir, file)
    if os.path.isfile(file_path) and file_path.endswith('.png'):
        img = Image.open(file_path)
        width, height = img.size
        
        num_pixels += (3 * width * height)
        img_sum += np.sum(np.array(img).astype(float), axis=(0, 1))
        img_sq_sum += np.sum(np.array(img).astype(float) ** 2, axis=(0, 1))

    if i % 500 == 0:
        print(f'{i}/{num_files}')
mean = img_sum / num_pixels
std = np.sqrt(img_sq_sum / num_pixels - mean ** 2) 
mean,std

0/3549
500/3549
1000/3549
1500/3549
2000/3549
2500/3549
3000/3549
3500/3549


In [60]:
transformer = T.Compose([
#     T.RandomCrop(128,128),
#     T.RandomRotation(2.8),
#     T.RandomHorizontalFlip(),
#     T.CenterCrop(10),
#     T.RandomVerticalFlip(),
    T.ToTensor(),
    T.Normalize((49.14544531, 52.61869218, 52.28344427), (75.67587604, 80.40975236, 81.53936774)),
#     T.RandomRotation(60, expand=False),
#     T.RandomAffine(30)
    #AddGaussianNoise(0., 1.)
])

test_transforms = T.Compose([
    T.ToTensor(),
    T.Normalize((49.14544531, 52.61869218, 52.28344427), (75.67587604, 80.40975236, 81.53936774)),
#     T.RandomRotation(60, expand=False),
#     T.RandomAffine(30)
    #AddGaussianNoise(0., 1.)
])

In [65]:
train_dataset = CustomDataset(base_path+'images/', train_name, ohe_train_cls, transformer)
val_dataset = CustomDataset(base_path+'images/', val_name, ohe_val_cls, transformer)

In [71]:
train_dataloader = D.DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True, drop_last=False)
val_dataloader = D.DataLoader(val_dataset, batch_size = BATCH_SIZE, shuffle = True, drop_last=False)

In [85]:
pretrained = timm.create_model('convnext_base',pretrained=True)
model = My_model(pretrained)

Downloading: "https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth" to /root/.cache/torch/checkpoints/convnext_base_1k_224_ema.pth


RuntimeError: Only one file(not dir) is allowed in the zipfile

In [74]:
device = 'cuda:0'

In [75]:
model.to(device)

My_model(
  (conv2d): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1))
  (pretrained): ConvNeXt(
    (stem): Sequential(
      (0): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      (1): LayerNorm2d((128,), eps=1e-06, elementwise_affine=True)
    )
    (stages): Sequential(
      (0): ConvNeXtStage(
        (downsample): Identity()
        (blocks): Sequential(
          (0): ConvNeXtBlock(
            (conv_dw): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128)
            (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
            (mlp): Mlp(
              (fc1): Linear(in_features=128, out_features=512, bias=True)
              (act): GELU()
              (drop1): Dropout(p=0.0, inplace=False)
              (fc2): Linear(in_features=512, out_features=128, bias=True)
              (drop2): Dropout(p=0.0, inplace=False)
            )
            (drop_path): Identity()
          )
          (1): ConvNeXtBlock(
            (conv_dw): Con

In [81]:
optimizer = torch.optim.Adam(model.parameters(),lr = 1e-3)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=0.1,eta_min=0.0001)
criterion = torch.nn.BCELoss()

In [78]:
from tqdm import tqdm

In [82]:
total_step = len(train_dataloader)
best_val_acc = 0
EPOCH = 50
for epoch in range(EPOCH):
    train_acc_list = []
    running_loss = 0
    
    model.train()
    for i, (images, labels) in enumerate(train_dataloader):
        images = images.type(torch.FloatTensor).to(device)
        labels = labels.type(torch.FloatTensor).to(device)
        
        optimizer.zero_grad()

        probs= model(images)
        loss = criterion(probs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        probs  = probs.cpu().detach().numpy()
        labels = labels.cpu().detach().numpy()
        preds = probs > 0.75
        batch_acc = (labels == preds).mean()
        train_acc_list.append(batch_acc)
    
    train_acc = np.mean(train_acc_list)
    print(f'Epoch [{epoch+1}/{EPOCH}], Step [{i+1}/{total_step}], Loss: {running_loss/total_step}, Acc {train_acc}')

    model.eval()
    valid_acc_list = []
    with torch.no_grad():
        correct = 0
        total = 0

        for images, labels in val_dataloader:
            images = images.type(torch.FloatTensor).to(device)
            labels = labels.type(torch.FloatTensor).to(device)

            probs = model(images)
            valid_loss = criterion(probs, labels)

            probs  = probs.cpu().detach().numpy()
            labels = labels.cpu().detach().numpy()
            preds = probs > 0.75
            batch_acc = (labels == preds).mean()
            valid_acc_list.append(batch_acc)
            
        val_acc = np.mean(valid_acc_list)
        print(f'Validation acc: {val_acc}')
    lr_scheduler.step()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), f'model_weights_epoch{i}.pth')

RuntimeError: Given groups=1, weight of size [3, 1, 3, 3], expected input[16, 3, 480, 640] to have 1 channels, but got 3 channels instead