In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os

In [65]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dataset_path = os.path.join(os.path.curdir, 'dataset')
train_test_ratio = 0.8
batch_size = 32
num_epochs = 4

In [None]:
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image

class GoogleDriveDataset(Dataset):
    def __init__(self, url, transform = None):
        self.url = url
        self.transform = transform

        self.samples = []   # [(image_path: string, label: string)]
        self.class_name_to_id = {} # {class_name: string -> id: number}
        
        for class_id, class_name in enumerate(sorted(os.listdir(url))):
            self.class_name_to_id[class_name] = class_id

            class_path = os.path.join(url, class_name)
            for file_name in os.listdir(class_path):
                self.samples.append((
                    os.path.join(class_path, file_name),
                    class_name
                ))

    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        image_path, label = dataset.samples[idx]
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label
    
dataset = GoogleDriveDataset(
    dataset_path, 
    transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
    ])
)

train_dataset, test_dataset = torch.utils.data.random_split(
    dataset, [
        int(train_test_ratio * len(dataset)), 
        len(dataset) - int(train_test_ratio * len(dataset))
    ]
)

[41, 39]

In [25]:
class Cnn(nn.Module):
    def __init__(self, w, h, out):
        super(Cnn, self).__init__()
        self.conv1 = nn.Conv2d(3, 5, 5)
        self.maxPool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(5, 5, 5)
        l1w = (((w - self.conv1.kernel_size[0] + 1) // 2 - self.conv2.kernel_size[0] + 1) // 2)
        l1h = (((h - self.conv1.kernel_size[0] + 1) // 2 - self.conv2.kernel_size[0] + 1) // 2)
        print(f'conv out dim: {l1w} x {l1h}')
        self.l1 = nn.Linear(l1w * l1h * 5, 120)
        self.l2 = nn.Linear(self.l1.out_features, 80)
        self.l3 = nn.Linear(self.l2.out_features, out)
        nParams = 0
        for p in self.parameters():
            nParams += p.data.view(-1).size()[0]
        print(f'params count: {nParams}')
    
    def forward(self, x):
        x = self.maxPool(F.relu(self.conv1(x)))
        x = self.maxPool(F.relu(self.conv2(x)))
        x = x.view(-1, self.l1.in_features)
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = self.l3(x)
        return x

In [27]:
cnn = Cnn(128, 128, 5).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters())

conv out dim: 29 x 29
params count: 515815
