In [1]:
%matplotlib inline

import time,os
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from PIL import Image

import sys
sys.path.append("..") 

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# load data

In [2]:
# setting dataset path
root_dir  = os.path.join("data", "split")
path_dict = {
    'train': os.path.join(root_dir, "train"),
    "valid": os.path.join(root_dir, "valid"),
    "test":  os.path.join(root_dir, "test")
}


# setting transform
transform_dict = {
    'train': torchvision.transforms.Compose([
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.Resize(72),
    torchvision.transforms.RandomResizedCrop(64,(0.5, 1.0), ratio=(0.75, 1.3333333333333333)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
    "valid": torchvision.transforms.Compose([
    torchvision.transforms.Resize(72),
    torchvision.transforms.CenterCrop(64),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
    "test": torchvision.transforms.Compose([
    torchvision.transforms.Resize(72),
    torchvision.transforms.CenterCrop(64),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])}

train_set = torchvision.datasets.ImageFolder(path_dict['train'],transform_dict['train'])
valid_set = torchvision.datasets.ImageFolder(path_dict['valid'],transform_dict['valid'])
test_set  = torchvision.datasets.ImageFolder(path_dict['test'], transform_dict['test'])

print(valid_set.class_to_idx)
print(train_set.class_to_idx)
print(test_set.class_to_idx)

{'fish_01': 0, 'fish_02': 1, 'fish_03': 2, 'fish_04': 3, 'fish_05': 4}
{'fish_01': 0, 'fish_02': 1, 'fish_03': 2, 'fish_04': 3, 'fish_05': 4}
{'fish_01': 0, 'fish_02': 1, 'fish_03': 2, 'fish_04': 3, 'fish_05': 4}


In [3]:
# Hyper_para
BATCH_SIZE = 512

# setting loader
train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)   
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)   
test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)   

# CNN from scratch

## define network

In [4]:
# considering the num of train set is limited, we adopt a rather small CNN-network:modified lenet
class CNN_LeNet(nn.Module):
    def __init__(self):
        super(CNN_LeNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 6, 5, 1, 2), # in_channels, out_channels, kernel_size
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # kernel_size, stride
            nn.Conv2d(6, 16, 5, 2, 2),
            nn.ReLU(),
            nn.Conv2d(16, 32, 5, 2, 2),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc = nn.Sequential(
            nn.Linear(32*4*4, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 5)
        )

    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(feature.size()[0], -1))
        return output

In [5]:
my_LeNet = CNN_LeNet()
my_LeNet.to(device)

CNN_LeNet(
  (conv): Sequential(
    (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (4): ReLU()
    (5): Conv2d(16, 32, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=512, out_features=120, bias=True)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): ReLU()
    (4): Linear(in_features=84, out_features=5, bias=True)
  )
)

## train network

### train setting

In [6]:
# Hyper-para
EPOCH = 20
LR_LeNet = 0.001

# print setting
print_freq = 10
drop_lr_after_epoch_num = 2


# training setting
optimizer_LeNet = optim.AdamW(my_LeNet.parameters(), LR_LeNet)
criterion_LeNet = nn.CrossEntropyLoss()

### train

In [7]:
# training
print("Start Training, LeNet!") 
for epoch in range(EPOCH):
    if epoch % drop_lr_after_epoch_num == (drop_lr_after_epoch_num - 1):
            LR_LeNet = LR_LeNet * 0.1
            print('Change learning rate to : %.09f at epoch %d' % (LR_LeNet, epoch + 1))


    sum_loss = 0.0
    # read data
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # clean grad
        optimizer_LeNet.zero_grad()

        # forward + backward
        outputs = my_LeNet(inputs)
        loss = criterion_LeNet(outputs, labels)
        loss.backward()
        optimizer_LeNet.step()

        # print everage loss every 10 batch
        sum_loss += loss.item()
        if i % print_freq == (print_freq - 1):
            print('[%d, %d] loss: %.03f'
                  % (epoch + 1, i + 1, sum_loss / print_freq))
            sum_loss = 0.0
    # val upon every epoch

    with torch.no_grad():
        correct = 0
        total = 0
        for data in valid_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = my_LeNet(images)
            # find index
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum()
        print('accuracy on valid set on epoch %d ：%.03f%%' % (epoch + 1, (100.0 * correct / total)))

torch.save(my_LeNet.state_dict(), 'my_LeNet_%03d.pth' % (epoch + 1))
print("Training Finished, TotalEPOCH=%d" % EPOCH)

Start Training, LeNet!
[1, 10] loss: 1.469
[1, 20] loss: 1.221
[1, 30] loss: 1.020
[1, 40] loss: 0.855
accuracy on valid set on epoch 1 ：64.303%
Change learning rate to : 0.000100000 at epoch 2
[2, 10] loss: 0.836
[2, 20] loss: 0.719
[2, 30] loss: 0.630
[2, 40] loss: 0.534
accuracy on valid set on epoch 2 ：81.771%
[3, 10] loss: 0.537
[3, 20] loss: 0.469
[3, 30] loss: 0.440
[3, 40] loss: 0.410
accuracy on valid set on epoch 3 ：87.780%
Change learning rate to : 0.000010000 at epoch 4
[4, 10] loss: 0.393
[4, 20] loss: 0.376
[4, 30] loss: 0.358
[4, 40] loss: 0.336
accuracy on valid set on epoch 4 ：89.583%
[5, 10] loss: 0.337
[5, 20] loss: 0.339
[5, 30] loss: 0.322
[5, 40] loss: 0.299
accuracy on valid set on epoch 5 ：87.500%
Change learning rate to : 0.000001000 at epoch 6
[6, 10] loss: 0.334
[6, 20] loss: 0.300
[6, 30] loss: 0.289
[6, 40] loss: 0.262
accuracy on valid set on epoch 6 ：90.465%
[7, 10] loss: 0.271
[7, 20] loss: 0.264
[7, 30] loss: 0.263
[7, 40] loss: 0.248
accuracy on valid 

## test model

In [8]:
# final test
with torch.no_grad():
    correct = 0
    total = 0
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = my_LeNet(images)
        # find index
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
    print('accuracy of my_LeNet on final test set ：%.03f%%' % (100.0 * correct / total))

accuracy of my_LeNet on final test set ：91.640%


# CNN from transfered learning

## load & modify pretrained network

In [15]:
# load pretrained network and paras
my_resnet18 = torchvision.models.resnet18(pretrained=True) 

# modify last fc layer to 5 class
fc_features = my_resnet18.fc.in_features 
my_resnet18.fc = nn.Linear(fc_features, 5)

## train network

### fix para

In [16]:
# move model to GPU if possible
my_resnet18.to(device)

# fix para & grad for pretrained layers
para_optim = []
for k in my_resnet18.children():
    if k == my_resnet18.fc:
        for param in k.parameters():
            para_optim.append(param)
    else:
        for param in k.parameters():
            param.requires_grad = False

### train setting

In [17]:
# Hyper-para
EPOCH = 20
LR_resnet = 0.001

# print setting
print_freq = 10
drop_lr_after_epoch_num = 2


# training setting
optimizer_resnet = optim.Adam(para_optim, LR_resnet)
criterion_resnet = nn.CrossEntropyLoss()

In [18]:
print_freq = 10
drop_lr_after_epoch_num = 4
print("Start Training, Resnet-18!") 

for epoch in range(EPOCH):
    if epoch % drop_lr_after_epoch_num == (drop_lr_after_epoch_num - 1):
            LR_resnet = LR_resnet * 0.1
            print('Change learning rate to : %.09f at epoch %d' % (LR_resnet, epoch + 1))

    # set to train mode for batch_norm layer
    my_resnet18.train()

    sum_loss = 0.0
    # read data
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # clean grad
        optimizer_resnet.zero_grad()

        # forward + backward
        outputs = my_resnet18(inputs)
        loss = criterion_resnet(outputs, labels)
        loss.backward()
        optimizer_resnet.step()

        # print everage loss every 10 batch
        sum_loss += loss.item()
        if i % print_freq == (print_freq - 1):
            print('[%d, %d] loss: %.03f'
                  % (epoch + 1, i + 1, sum_loss / print_freq))
            sum_loss = 0.0

    # val upon every epoch
    my_resnet18.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for data in valid_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = my_resnet18(images)
            # find index
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum()
        print('accuracy on valid set on epoch %d ：%.03f%%' % (epoch + 1, (100.0 * correct / total)))

torch.save(my_resnet18.state_dict(), 'resnet18_%03d.pth' % (epoch + 1))
print("Training Finished, TotalEPOCH=%d" % EPOCH)

Start Training, Resnet-18!
[1, 10] loss: 1.554
[1, 20] loss: 1.084
[1, 30] loss: 0.822
[1, 40] loss: 0.689
accuracy on valid set on epoch 1 ：80.048%
[2, 10] loss: 0.604
[2, 20] loss: 0.535
[2, 30] loss: 0.489
[2, 40] loss: 0.456
accuracy on valid set on epoch 2 ：85.697%
[3, 10] loss: 0.443
[3, 20] loss: 0.420
[3, 30] loss: 0.418
[3, 40] loss: 0.423
accuracy on valid set on epoch 3 ：86.378%
Change learning rate to : 0.000100000 at epoch 4
[4, 10] loss: 0.390
[4, 20] loss: 0.374
[4, 30] loss: 0.367
[4, 40] loss: 0.413
accuracy on valid set on epoch 4 ：88.301%
[5, 10] loss: 0.368
[5, 20] loss: 0.345
[5, 30] loss: 0.336
[5, 40] loss: 0.333
accuracy on valid set on epoch 5 ：88.462%
[6, 10] loss: 0.332
[6, 20] loss: 0.325
[6, 30] loss: 0.333
[6, 40] loss: 0.377
accuracy on valid set on epoch 6 ：89.303%
[7, 10] loss: 0.340
[7, 20] loss: 0.317
[7, 30] loss: 0.323
[7, 40] loss: 0.301
accuracy on valid set on epoch 7 ：89.744%
Change learning rate to : 0.000010000 at epoch 8
[8, 10] loss: 0.320
[

In [19]:
# final test
my_resnet18.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = my_resnet18(images)
        # find index
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
    print('accuracy of my_resnet18 on final test set ：%.03f%%' % (100.0 * correct / total))

accuracy of my_resnet18 on final test set ：89.760%
