In [1]:
import torch
from torch import nn


def vgg_block(num_convs, in_channels, out_channels):
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels, (3,3), padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels

    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)

def vgg(conv_arch):
    conv_blks = []
    in_chanells = 1
    for (num_convs, out_channels) in conv_arch:
        conv_blks.append(vgg_block(num_convs, in_chanells, out_channels))
        in_chanells = out_channels
    
    return nn.Sequential(
        *conv_blks,
        nn.Flatten(),
        nn.Linear(out_channels*7*7, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 10)
    )

conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))

In [12]:
import torchvision
from torchvision import transforms
import pandas as pd
import os
from PIL import Image
from torch.utils import data
from sklearn.utils import shuffle

In [13]:
def load_train_csv():
    df = pd.read_csv('../data/classify-leaves/train.csv')
    list_type = df['label'].unique()
    dic = {}
    for i,t in enumerate(list_type):
        dic[t] = i
    df['num_label'] = df['label'].map(dic)
    return shuffle(df)

class Dataset(object):
    def __init__(self, data_frame, root_dir, transform=None) -> None:
        self.data_frame = data_frame
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)
    
    def __getitem__(self, idx):
        imgpath = os.path.join(self.root_dir, self.data_frame.iloc[idx,0])
        label = self.data_frame.iloc[idx,2]
        img = Image.open(imgpath).convert('RGB')
        if self.transform:
            img = self.transform(img)

        return img, label

def ac(data_iter, net, device):
    num_acs = []
    for x, y in data_iter:
        x = x.to(device)
        y = y.to(device)
        y_hat = net.predict(x)
        maxs, indexs = torch.max(y_hat, dim=1)
        num_acs.append(y.eq(indexs).sum()/indexs.shape[0])
    return sum(num_acs)/len(num_acs)

data_csv = load_train_csv()
train_csv = data_csv.iloc[:15000,:]
test_csv = data_csv.iloc[15000:,:]
trans = transforms.Compose([transforms.ToTensor()])
train_dataset = Dataset(train_csv, '../data/classify-leaves', trans)
test_dataset = Dataset(test_csv, '../data/classify-leaves', trans)

In [14]:
batch_size = 24
num_epochs = 10
lr = 0.1
train_iter = data.DataLoader(train_dataset, batch_size=batch_size, num_workers=4, pin_memory=True)
test_iter = data.DataLoader(test_csv, batch_size=batch_size, num_workers=4, pin_memory=True)

net = vgg(conv_arch)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net.to(device)
loss = nn.CrossEntropyLoss()
trainer = torch.optim.SGD(net.parameters(), lr=lr)

In [15]:
for i in range(num_epochs):
    for x,y in train_iter:
        x = x.to(device)
        y = y.to(device)
        y_hat = net(x)
        l = loss(y_hat, y)
        trainer.zero_grad()
        l.backward()
        trainer.step()
    print(l)
    print(ac(test_iter, net, device))

In [9]:
import torch
from torch import nn



class Net(nn.Module):
    def __init__(self) -> None:
        super().__init__()

        self.layer2 = nn.Sequential(
            nn.Conv2d(3,60,3,1,1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
            nn.Conv2d(60,512,5,1,0),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(512,1024,5,1,0),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Flatten(),
            nn.Linear(1024, 176)
        )
    def forward(self, x):
        h1 = self.layer1(x)
        h2 = self.layer2(h1)
        return h2

net = Net()

In [10]:
from torchinfo import summary

summary(net, (1,3,32,32))

Layer (type:depth-idx)                   Output Shape              Param #
Net                                      [1, 1024, 1, 1]           180,400
├─Sequential: 1-1                        [1, 60, 16, 16]           --
│    └─Conv2d: 2-1                       [1, 60, 32, 32]           1,680
│    └─ReLU: 2-2                         [1, 60, 32, 32]           --
│    └─MaxPool2d: 2-3                    [1, 60, 16, 16]           --
├─Sequential: 1-2                        [1, 1024, 1, 1]           --
│    └─Conv2d: 2-4                       [1, 512, 12, 12]          768,512
│    └─ReLU: 2-5                         [1, 512, 12, 12]          --
│    └─MaxPool2d: 2-6                    [1, 512, 6, 6]            --
│    └─Conv2d: 2-7                       [1, 1024, 2, 2]           13,108,224
│    └─ReLU: 2-8                         [1, 1024, 2, 2]           --
│    └─MaxPool2d: 2-9                    [1, 1024, 1, 1]           --
Total params: 14,058,816
Trainable params: 14,058,816
Non-traina