In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init

from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder

from PIL import Image

In [73]:
batch = 64
learning_rate = 0.0002
epochs = 10

In [79]:
class custom_dataset(Dataset):

    label_encoder = LabelEncoder()

    def __init__(self, csv, root):
        self.df = pd.read_csv(f"{csv}")
        self.root = root
        self.df['class'] = self.label_encoder.fit_transform(self.df['class'])

        width = round(self.df["width"].mean())
        height = round(self.df["height"].mean())
        self.transform = transforms.Compose([
            transforms.Resize((width, height)),  # 이미지 크기 조정
            transforms.RandomHorizontalFlip(p=0.05),
            transforms.ToTensor()  # 텐서로 변환
            ])
    
    def __len__(self):
        return len(self.df)
    
    def len_labels(self):
        return len(self.label_encoder.classes_)
    
    def get_size(self):
        width = round(self.df["width"].mean())
        height = round(self.df["height"].mean())
        return width, height

    def encoder(self):
        return self.label_encoder
    
    def __getitem__(self, idx):
        image = Image.open(f"{self.df.iloc[idx]["path"]}").convert('RGB') 
        
        if self.transform:
            image = self.transform(image)

        wh = self.df.iloc[idx]["w/h"].astype(np.float32)
        bha = self.df.iloc[idx]["bha"].astype(np.float32)
        label = self.df.iloc[idx]["class"].astype(np.int32)

        item = {
                'image':image,
                'wh':wh,
                'bha':bha,
                'label':label
                }
        
        return item

In [80]:
dataset = custom_dataset(csv = "data/train.csv", root = "")
dataloader = DataLoader(dataset, batch_size=batch, shuffle=True)
for i in dataloader: 
    print(i)
    break

{'image': tensor([[[[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          ...,
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],

         [[0.9961, 0.9961, 0.9961,  ..., 1.0000, 1.0000, 1.0000],
          [0.9961, 0.9961, 0.9961,  ..., 1.0000, 1.0000, 1.0000],
          [0.9961, 0.9961, 0.9961,  ..., 1.0000, 1.0000, 1.0000],
          ...,
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],

         [[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.000

In [110]:
class CNN(nn.Module):
    def __init__(self, width, height, classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
        self.fc1 = nn.Linear(32*width*height + 2, 64)
        self.fc2 = nn.Linear(64, classes)

    def forward(self, image, wh, bha):
        x = self.conv1(image)
        x = x.view(x.size(0), -1)
        wh = wh.view(wh.size(0), -1)  # [batch_size, 1]로 변형
        bha = bha.view(bha.size(0), -1)
        print(wh)
        x = torch.cat((x, wh, bha), dim = 1)
        x = self.fc1(x)
        x = self.fc2(x)

        return x

In [111]:
num_classes = dataset.len_labels()
model = CNN(width = dataset.get_size()[0], height = dataset.get_size()[1],classes = num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [112]:
for epoch in range(epochs):
    for batch in dataloader:
        images = batch['image']
        wh = batch['wh']
        bha = batch["bha"]
        labels = batch['label']
        
        optimizer.zero_grad()
        outputs = model(images, wh, bha).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")

torch.Size([128])


RuntimeError: Tensors must have same number of dimensions: got 2 and 1