In [1]:
import torch
import torchvision
from torchvision import transforms, datasets
from torchvision.transforms import Normalize, ToTensor
import torch.nn as nn  # neural network
import torch.optim as optim  # optimization layer
import torch.nn.functional as F  # activation functions
import matplotlib.pyplot as plt
import argparse

In [2]:
train_set = datasets.EMNIST(root="data", split="balanced",
                        train=True, download=True,
                        transform=transforms.Compose([ToTensor()])
                           )

test_set = datasets.EMNIST(root="data", split="balanced", 
                       train=False, download=True, 
                       transform=transforms.Compose([ToTensor()])
                          )

entire_trainset = torch.utils.data.DataLoader(train_set, shuffle=True)
split_train_size = int(0.8*(len(entire_trainset)))
split_valid_size = len(entire_trainset) - split_train_size

train_set, val_set = torch.utils.data.random_split(entire_trainset, [split_train_size, split_valid_size])

print(f'train set size: {split_train_size}, validation set size: {split_valid_size}')

train set size: 90240, validation set size: 22560


In [3]:
parser = argparse.ArgumentParser()
args = parser.parse_args("")

#### Model Capacity ####
args.model_code = 'model_1'
args.in_channels = 1
args.in_dim = 1  # input to a fc layer from the last conv layer
args.out_dim = 62
args.act = 'relu'

#### Regularization ####
args.dropout = 0.2
args.use_bn = True
# args.use_xavier = True

#### Optimization ####
args.optim = 'adam'
args.lr = 0.001  # learning rate
args.epoch = 10
args.train_batch = 256
args.test_batch = 256

print(args)

Namespace(act='relu', dropout=0.2, epoch=10, in_channels=1, in_dim=1, lr=0.001, model_code='model_1', optim='adam', out_dim=62, test_batch=256, train_batch=256, use_bn=True)


In [4]:
model_codes = {
    'model_1': [16, 64, 'M', 128, 128],
    'model_2': [16, 64, 'M', 128, 128, 'M', 256, 256],
    'model_3': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512]
}

In [5]:
# simple model
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 3 convolutional layers
        self.cv1 = nn.Conv2d(in_channel=1,out_channels=16,kernel_size=5, stride=1)  # input: 1 if grayscale, 3 if RGB
        self.cv2 = nn.Conv2d(16, 64, 5)
        self.cv3 = nn.Conv2d(64, 128, 5)
        self.dropout1 = nn.Dropout(0.2)
        
        # Dense layer - (fully connected)
        self.fc1 = nn.Linear(in_features=128*3*3, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=128)
        self.out = nn.Linear(in_features=128, out_features=47)
        
    def forward(self, x):
        '''
        forward method explicitly defines the network's transformation.
        forward method maps an input tensor to a prediction output tensor
        '''
        # hidden convolutional layers
        x = F.relu(self.cv1(x))
        x = F.relu(self.cv2(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        x = F.relu(self.cv3(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        
        x = self.dropout1(x)
        
        # hidden linear layers
        x = torch.flatten(x, 1)
        #x = x.view(-1, 128*3*3)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        
        # output layer
        x = self.out(x)
        x = F.softmax(x, dim=1)
        
        return x

How to find the initial input size of dense layer
window size (kernel)^2 x output channel of the last channel (128)

To find window of the last convolutional layer
(input size (28) - kernel size - 2*padding)/stride + 1
* first layer: (28-5-0)/1 + 1 = 24
* second layer: (24-5-0)/1 + 1 = 20 -> after maxpooing -> 10
* third layer: (10-5-0)/1 + 1 = 6 -> after maxpooling -> 3

In [6]:
# easily tunable model
class CNN(nn.Module):
    def __init__(self, model_code, in_channels, out_dim, act, use_bn, dropout):
        super(CNN, self).__init__()
        
        if act == 'relu':
            self.act = nn.ReLU()
        elif act == 'sigmoid':
            self.act = nn.Sigmoid()
        elif act == 'leakyrelu':
            self.act = nn.LeakyReLU()
        else:
            raise ValueError("Not a valid activation function")
            
        
        self.layers = self.make_layers(model_code, in_channels, use_bn)
        self.dropout = nn.Dropout(dropout)
        self.classifier = nn.Sequential(nn.Linear(12544, 256), self.act,nn.Linear(256, out_dim))

    def forward(self, x):
        x = self.layers(x)
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        # skipped softmax to use cross entropy loss
        return x
    
    def make_layers(self, model_code, in_channels, use_bn):
        layers = []
        for x in model_codes[model_code]:
            if x == "M":
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels=in_channels,
                                    out_channels=x,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)]
                if use_bn:
                    layers += [nn.BatchNorm2d(x)]
                layers += [self.act]
                in_channels = x
        return nn.Sequential(*layers)

# Train, Validate, Test

In [7]:
net = CNN(args.model_code, args.in_channels, args.out_dim, args.act, args.use_bn, args.dropout)
optimizer = optim.Adam(net.parameters(), lr=args.lr) # learning rate
loss_function = nn.CrossEntropyLoss()

In [8]:
def train(net, optimizer, criterion, args):
    # load train set as some other object that can help on iterating over data 
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.train_batch, shuffle=True)
    
    net.train()
    
    correct = 0
    total = 0
    train_loss = 0
    
    for i, data in enumerate(train_loader):
        inputs, labels = data
        outputs = net(inputs)
        
        optimizer.zero_grad()
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
        # the class with the highest value is the prediction
        _, prediction = torch.max(outputs.data, 1)  # grab prediction as one-dimensional tensor
        total += labels.size(0)
        correct += (prediction == labels).sum().item()

    train_loss = train_loss / len(train_loader)
    train_acc = 100 * correct / total
    
    return net, train_loss, train_acc

In [9]:
def validate(net, criterion, args):
    # load validation set as some other object that can help on iterating over data 
    val_loader = torch.utils.data.DataLoader(val_set, batch_size=args.test_batch, shuffle=True)
    
    net.eval()

    correct = 0
    total = 0
    val_loss = 0 
    
    with torch.no_grad():
        for data in valloader:
            inputs, labels = data
            outputs = net(images)

            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, prediction = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (prediction == labels).sum().item()

        val_loss = val_loss / len(val_loader)
        val_acc = 100 * correct / total

    return val_loss, val_acc

In [10]:
def test(net):
    # load test as some other object that can help on iterating over data 
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=args.test_batch, shuffle=True)
    pass