In [None]:
!pip install torch_optimizer

import numpy as np
import random
import math
import time
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

from torch.optim import Optimizer
import torch_optimizer as optim

from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

from torchvision import datasets
from torchvision.transforms import transforms

import torch.nn.functional as F


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
name = torch.cuda.get_device_name(0)
print("GPU: " + name)

random_seed = 0

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

np.random.seed(random_seed)
random.seed(random_seed)


class EarlyStopping:
    def __init__(self, patience=10, path='checkpoint.pt'):
        
        self.patience = patience
        self.counter = 0
        self.best_score = None
        self.val_loss_min = np.Inf
        self.early_stop = False

        self.path = path



    def __call__(self, val_loss, model):

        score = val_loss

        if self.best_score is None:
            self.best_score = score
            
            print('Validation loss decreased ({:.4f} --> {:.4f}).  Saving model ...'.format(self.val_loss_min, val_loss))
            torch.save(model.state_dict(), self.path)
            self.val_loss_min = val_loss

        elif score > self.best_score:
            self.counter += 1
            print("EarlyStopping counter: {} out of {}".format(self.counter, self.patience))

            if self.counter >= self.patience:
                self.early_stop = True

        else:
            self.best_score = score
            self.counter = 0

            print('Validation loss decreased --- Saving model ...')
            torch.save(model.state_dict(), self.path)
            self.val_loss_min = val_loss



def get_data_len_index(pad=4, randomcrop=32):
    data_shuffle = []

    transform = transforms.Compose([
                                    transforms.Pad(pad),
                                    transforms.RandomHorizontalFlip(),
                                    transforms.RandomCrop(randomcrop),
                                    transforms.ToTensor()
    ])

    train_dataset = datasets.CIFAR10(root='./cifar_10data/',
                                     train=True,
                                     transform=transform,
                                     download=True) 
    
    len_train = len(train_dataset)
    index_train = list(range(len_train))
    

    data_shuffle.append(len_train)
    data_shuffle.append(index_train)
    data_shuffle.append(train_dataset)

    return data_shuffle



def test_model(model, batch_size=128):
    model.eval()
    test_loss, correct, total = 0, 0, 0

    with torch.no_grad():
        for images, labels in test_loader :
            images, labels = images.to(device), labels.to(device)

            output = model(images)
            test_loss += loss_function(output, labels).item()

            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(labels.view_as(pred)).sum().item()

            total += labels.size(0)

    print('[Test set] Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
            test_loss /total, correct, total,
            100. * correct / total))
    

def train_model(model, batch_size, n_epochs, patience, loader):
    train_loader = loader[0]
    valid_loader = loader[1]
    
    train_losses = []
    valid_losses = []

    avg_train_losses = []
    avg_valid_losses = []

    early_stopping = EarlyStopping(patience=patience)

    model.train()

    start = time.time()

    for epoch in range(1,n_epochs+1):
        print("{}th Epoch starting.".format(epoch))

        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            output = model(images)

            train_loss = loss_function(output, labels)

            train_loss.backward()

            optimizer.step()

            loss = train_loss.item()


            train_losses.append(loss)


        model.eval()

        with torch.no_grad():
            for images, labels in valid_loader:
                images , labels = images.to(device), labels.to(device)

                output = model(images)

                valid_loss = loss_function(output, labels)

                loss = valid_loss.item()

                valid_losses.append(loss)
        
        loss_train = np.average(train_losses)
        loss_valid = np.average(valid_losses)

        avg_train_losses.append(loss_train)
        avg_valid_losses.append(loss_valid)

        print("Epoch [{}] Train Loss: {:.4f} & Validation Loss: {:.4f}".format(epoch, loss_train, loss_valid))

        train_losses = []
        valid_losses = []

        early_stopping(loss_valid, model)

        if early_stopping.early_stop:
            print("Early Stopping!!")
            end = time.time()
            #print(end-start)
            break

    model.load_state_dict(torch.load('checkpoint.pt'))

    return model, avg_train_losses, avg_valid_losses



def train_KFold(model, batch_size, n_epochs, patience, data_info, fold):
        
    train_losses = []
    valid_losses = []

    loader = []

    path='checkpoint.pt'

    len_train = data_info[0]
    index_train = data_info[1]
    train_dataset = data_info[2]

    np.random.shuffle(index_train)

    split_size = len_train // fold

    start = time.time()
    for i in range(fold):

        valid_ind = index_train[split_size * i: split_size * (i+1)]
                
        if i == 0:
            train_ind = index_train[split_size * (i+1):]
                    
        elif i == (fold-1):
            train_ind = index_train[:split_size*i]

        else:
            train_ind = index_train[:split_size * i] + index_train[split_size * (i+1):]

        train_sampler = SubsetRandomSampler(train_ind)
        valid_sampler = SubsetRandomSampler(valid_ind)    

        train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                                        batch_size=batch_size,
                                                        sampler = train_sampler,
                                                        num_workers=0)
                
        valid_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                                        batch_size=batch_size,
                                                        sampler = valid_sampler,
                                                        num_workers=0)
        
        loader.append(train_loader)
        loader.append(valid_loader)
                

        print("{} Fold is Training".format(i+1))

        model, avg_train_losses, avg_valid_losses = train_model(model, batch_size, n_epochs, patience, loader)
        
        train_losses.append(avg_train_losses)
        valid_losses.append(avg_valid_losses)
        
    end = time.time()

    print(end-start)

    return model, train_losses, valid_losses


Collecting torch_optimizer
[?25l  Downloading https://files.pythonhosted.org/packages/ce/70/ca0cd259662eef5c9448d3ecf14af880bbfe76331e4eeab7b19827d6dbe6/torch_optimizer-0.0.1a17-py3-none-any.whl (69kB)
[K     |████▊                           | 10kB 23.2MB/s eta 0:00:01[K     |█████████▌                      | 20kB 17.3MB/s eta 0:00:01[K     |██████████████▏                 | 30kB 15.3MB/s eta 0:00:01[K     |███████████████████             | 40kB 14.8MB/s eta 0:00:01[K     |███████████████████████▋        | 51kB 11.6MB/s eta 0:00:01[K     |████████████████████████████▍   | 61kB 11.7MB/s eta 0:00:01[K     |████████████████████████████████| 71kB 6.1MB/s 
Collecting pytorch-ranger>=0.1.1
  Downloading https://files.pythonhosted.org/packages/0d/70/12256257d861bbc3e176130d25be1de085ce7a9e60594064888a950f2154/pytorch_ranger-0.1.1-py3-none-any.whl
Installing collected packages: pytorch-ranger, torch-optimizer
Successfully installed pytorch-ranger-0.1.1 torch-optimizer-0.0.1a17
c

In [None]:
class VGG13_32(nn.Module) :
    def __init__(self) :
        super(VGG13_32, self).__init__()
        
        self.conv_layer1 = nn.Sequential(
                nn.BatchNorm2d(3),                              # Normalize the Input
                nn.Conv2d(3, 64, kernel_size=3, padding=1),     # 64 * 32 * 32
                nn.BatchNorm2d(64),     
                nn.ReLU(),
                nn.Conv2d(64, 64, kernel_size=3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),                                      # 64 * 32 * 32
                nn.MaxPool2d(kernel_size=2, stride=2)           # 64 * 16 * 16
                )
        self.conv_layer2 = nn.Sequential(
                nn.Conv2d(64, 128, kernel_size=3, padding=1),   # 128 * 16 * 16
                nn.BatchNorm2d(128),   
                nn.ReLU(),
                nn.Conv2d(128, 128, kernel_size=3, padding=1),  # 128 * 16 * 16
                nn.BatchNorm2d(128), 
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2)           # 128 * 8 * 8
                )
        self.conv_layer3 = nn.Sequential(
                nn.Conv2d(128, 256, kernel_size=3, padding=1),  # 256 * 8 * 8
                nn.BatchNorm2d(256),  
                nn.ReLU(),
                nn.Conv2d(256, 256, kernel_size=3, padding=1),  # 256 * 8 * 8
                nn.BatchNorm2d(256),  
                nn.ReLU(),
                nn.Conv2d(256, 256, kernel_size=3, padding=1),  # 256 * 8 * 8
                nn.BatchNorm2d(256),  
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2)           # 256 * 4 * 4
                )
        self.conv_layer4 = nn.Sequential(
                nn.Conv2d(256, 512, kernel_size=3, padding=1),  # 512 * 4 * 4
                nn.BatchNorm2d(512),  
                nn.ReLU(),
                nn.Conv2d(512, 512, kernel_size=3, padding=1),  # 512 * 4 * 4
                nn.BatchNorm2d(512),  
                nn.ReLU(),
                nn.Conv2d(512, 512, kernel_size=3, padding=1),  # 512 * 4 * 4
                nn.BatchNorm2d(512),  
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),          # 512 * 2 * 2
                )
        self.fc_layer1 = nn.Sequential(
                nn.Dropout(),
                nn.Linear(512*2*2, 4096),                           # 1 * 4096
                nn.ReLU()
                )
        self.fc_layer2 = nn.Sequential(
                nn.Dropout(),
                nn.Linear(4096, 1024),                          # 1 * 4096
                nn.ReLU()
                )
        self.fc_layer3 = nn.Sequential(
                nn.Linear(1024, 10),                     # 1 * num_class
                )

    def forward(self, x) :
        output = self.conv_layer1(x)
        output = self.conv_layer2(output)
        output = self.conv_layer3(output)
        output = self.conv_layer4(output)
        output = output.view(-1, 512*2*2)
        output = self.fc_layer1(output)
        output = self.fc_layer2(output)
        output = self.fc_layer3(output)
        return output

In [None]:
import torch.nn.init as init

def weight_init(m):
  if isinstance(m, nn.Conv2d):
    init.kaiming_uniform_(m.weight.data)

In [None]:
class VGG13_32_2(nn.Module) :
    def __init__(self) :
        super(VGG13_32_2, self).__init__()
        
        self.conv_layer1 = nn.Sequential(
                nn.BatchNorm2d(3),                              # Normalize the Input
                nn.Conv2d(3, 64, kernel_size=3, padding=1),     # 64 * 32 * 32
                nn.BatchNorm2d(64),     
                nn.ReLU(),
                nn.Conv2d(64, 64, kernel_size=3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),                                      # 64 * 32 * 32
                nn.MaxPool2d(kernel_size=2, stride=2)           # 64 * 16 * 16
                )
        self.conv_layer2 = nn.Sequential(
                nn.Conv2d(64, 128, kernel_size=3, padding=1),   # 128 * 16 * 16
                nn.BatchNorm2d(128),   
                nn.ReLU(),
                nn.Conv2d(128, 128, kernel_size=3, padding=1),  # 128 * 16 * 16
                nn.BatchNorm2d(128), 
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2)           # 128 * 8 * 8
                )
        self.conv_layer3 = nn.Sequential(
                nn.Conv2d(128, 256, kernel_size=3, padding=1),  # 256 * 8 * 8
                nn.BatchNorm2d(256),  
                nn.ReLU(),
                nn.Conv2d(256, 256, kernel_size=3, padding=1),  # 256 * 8 * 8
                nn.BatchNorm2d(256),  
                nn.ReLU(),
                nn.Conv2d(256, 256, kernel_size=3, padding=1),  # 256 * 8 * 8
                nn.BatchNorm2d(256),  
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2)           # 256 * 4 * 4
                )
        self.conv_layer4 = nn.Sequential(
                nn.Conv2d(256, 512, kernel_size=3, padding=1),  # 512 * 4 * 4
                nn.BatchNorm2d(512),  
                nn.ReLU(),
                nn.Conv2d(512, 512, kernel_size=3, padding=1),  # 512 * 4 * 4
                nn.BatchNorm2d(512),  
                nn.ReLU(),
                nn.Conv2d(512, 512, kernel_size=3, padding=1),  # 512 * 4 * 4
                nn.BatchNorm2d(512),  
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),          # 512 * 2 * 2
                )
        self.fc_layer1 = nn.Sequential(
                nn.Dropout(),
                nn.Linear(512*2*2, 4096),                           # 1 * 4096
                nn.ReLU()
                )
        self.fc_layer2 = nn.Sequential(
                nn.Dropout(),
                nn.Linear(4096, 4096),                          # 1 * 4096
                nn.ReLU()
                )
        self.fc_layer3 = nn.Sequential(
                nn.Linear(4096, 10),                     # 1 * num_class
                )

    def forward(self, x) :
        output = self.conv_layer1(x)
        output = self.conv_layer2(output)
        output = self.conv_layer3(output)
        output = self.conv_layer4(output)
        output = output.view(-1, 512*2*2)
        output = self.fc_layer1(output)
        output = self.fc_layer2(output)
        output = self.fc_layer3(output)
        return output




In [None]:
data_info = get_data_len_index()

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar_10data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./cifar_10data/cifar-10-python.tar.gz to ./cifar_10data/


### Change FC 4096 layer  
4096 --> 1048 -->  10  

### He Uniform Initialization

### Batch_size 63 --> 256  
### Weight_decay 5e-4 --> 5e-5

In [None]:
model = VGG13_32().to(device)
model.apply(weight_init)
loss_function = torch.nn.CrossEntropyLoss()
optimizer = optim.RAdam(model.parameters(), weight_decay=5e-5)
batch_size = 256

In [None]:
model, train_losses, valid_losses = train_KFold(model, batch_size=batch_size, n_epochs=250, patience=20, data_info=data_info, fold=5)

1 Fold is Training
1th Epoch starting.
Epoch [1] Train Loss: 1.7446 & Validation Loss: 1.6394
Validation loss decreased (inf --> 1.6394).  Saving model ...
2th Epoch starting.
Epoch [2] Train Loss: 2.0726 & Validation Loss: 1.9270
EarlyStopping counter: 1 out of 20
3th Epoch starting.
Epoch [3] Train Loss: 1.6171 & Validation Loss: 1.5772
Validation loss decreased --- Saving model ...
4th Epoch starting.
Epoch [4] Train Loss: 1.3981 & Validation Loss: 1.3879
Validation loss decreased --- Saving model ...
5th Epoch starting.
Epoch [5] Train Loss: 1.2343 & Validation Loss: 1.1792
Validation loss decreased --- Saving model ...
6th Epoch starting.
Epoch [6] Train Loss: 1.0960 & Validation Loss: 1.2180
EarlyStopping counter: 1 out of 20
7th Epoch starting.
Epoch [7] Train Loss: 0.9865 & Validation Loss: 1.0754
Validation loss decreased --- Saving model ...
8th Epoch starting.
Epoch [8] Train Loss: 0.8844 & Validation Loss: 0.8955
Validation loss decreased --- Saving model ...
9th Epoch star

In [None]:
 test_dataset = datasets.CIFAR10(root='./cifar_10data/',
                                train=False,
                                transform=transforms.ToTensor(),
                                download=True) 

test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=batch_size,
                                          num_workers=0)

Files already downloaded and verified


In [None]:
test_model(model)

[Test set] Average loss: 0.0025, Accuracy: 7962/10000 (79.62%)



### Base Model
#### batch_size = 128
#### He
#### noweight_decay

In [None]:
model = VGG13_32_2().to(device)
model.apply(weight_init)
loss_function = torch.nn.CrossEntropyLoss()
optimizer = optim.RAdam(model.parameters())
batch_size = 128

In [None]:
model, train_losses, valid_losses = train_KFold(model, batch_size=batch_size, n_epochs=250, patience=20, data_info=data_info, fold=5)

1 Fold is Training
1th Epoch starting.
Epoch [1] Train Loss: 1.6348 & Validation Loss: 1.3335
Validation loss decreased (inf --> 1.3335).  Saving model ...
2th Epoch starting.
Epoch [2] Train Loss: 2.2022 & Validation Loss: 1.9265
EarlyStopping counter: 1 out of 20
3th Epoch starting.
Epoch [3] Train Loss: 1.8511 & Validation Loss: 1.8259
EarlyStopping counter: 2 out of 20
4th Epoch starting.
Epoch [4] Train Loss: 1.6147 & Validation Loss: 1.5371
EarlyStopping counter: 3 out of 20
5th Epoch starting.
Epoch [5] Train Loss: 1.3622 & Validation Loss: 1.2939
Validation loss decreased --- Saving model ...
6th Epoch starting.
Epoch [6] Train Loss: 1.1781 & Validation Loss: 1.0677
Validation loss decreased --- Saving model ...
7th Epoch starting.
Epoch [7] Train Loss: 1.0257 & Validation Loss: 1.0474
Validation loss decreased --- Saving model ...
8th Epoch starting.
Epoch [8] Train Loss: 0.9273 & Validation Loss: 0.9060
Validation loss decreased --- Saving model ...
9th Epoch starting.
Epoch 

In [None]:
test_model(model)

[Test set] Average loss: 0.0030, Accuracy: 8894/10000 (88.94%)

