In [1]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import cv2
import os
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch import optim
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from torchvision import transforms


In [2]:
class DatasetPlus(Dataset):
    def __init__(self, root_img, root_data, width, hight, transform=None):
        self.root_img = root_img
        self.root_data = root_data
        self.width = width
        self.hight = hight
        self.transform = transform
        # labels are stored in a csv file
        self.labels = pd.read_csv(self.root_data)
        self.imgs = [image for image in sorted(
            os.listdir(self.root_img)) if image[-4:] == '.jpg']
        self.len = len(self.imgs)

    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        img_name = self.imgs[idx]
        img_path = os.path.join(self.root_img, img_name)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        img = cv2.resize(img, (self.width, self.hight), cv2.INTER_AREA)
        img = np.array(img) / 255.0

        if self.transform is not None:
            img = self.transform(img)

        img_id = int(img_name[6:-4])
        label = self.labels.where(self.labels['ID'] == img_id)['Label'].dropna().to_numpy()[0]
        if label == 7: label = 0


        label = torch.tensor(label, dtype=torch.long)


        return img, label

In [3]:
class Net(nn.Module):
    def __init__(self, h, w):
        super().__init__()
        nw = (((w - 4) // 2) -4) // 2
        nh = (((h - 4) // 2) -4) // 2
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * nh * nw, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 3)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = (self.fc3(x))
        return x

In [4]:
transform = transforms.Compose(
    [transforms.ToTensor(),])
root_img = 'data/images/'
root_label = 'data/metadata/PSL_dataset.csv'
ds = DatasetPlus(root_img, root_label, 224, 224, transform=transform)

In [5]:
model = Net(224, 224)

trainloader = DataLoader(ds, batch_size=4, shuffle=True)

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=1e-5)

In [6]:
def train_model(epochs):
    for epoch in range(epochs): 
        losses = 0.0 
        for i, data in enumerate(trainloader, 0):
            optimizer.zero_grad()
            img, label = data
            yhat = model(img)
            # yhat = yhat.view(-1)
            loss = criterion(yhat, label)
            loss.backward()
            optimizer.step()
            losses += loss.item()
            # if i % 5 == 99:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {losses:.3f}')
            losses = 0.0
        

In [13]:
train_model(2000)

[1,     1] loss: 1.084
[1,     2] loss: 1.103
[1,     3] loss: 1.121
[1,     4] loss: 1.093
