In [1]:
import numpy as np
import time
np.set_printoptions(precision=1)
# import tensorflow as tf
import matplotlib.pylab as plt

from modules.utils import load_cifar10
# from modules.cnn_with_spectral_parameterization import CNN_Spectral_Param
# from modules.cnn_with_spectral_pooling import CNN_Spectral_Pool
from modules.image_generator import ImageGenerator

import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.module import Module
import pytorch_fft.fft.autograd as fft

% matplotlib inline
% load_ext autoreload
% autoreload 2

In [2]:
# In the interest of training time, we only used 1 of 5 cifar10 batches
# The important part of the experiment is to compare the rates of convergence of training accuracy,
# so subsetting the training dataset for both spectral and spatial models shouldn't impact
# the relationship between their train accuracy convergences
xtrain, ytrain, xtest, ytest = load_cifar10(5, channels_last=False)

file already downloaded..
getting batch 1
getting batch 2
getting batch 3
getting batch 4
getting batch 5


In [3]:
xtrain.shape, ytrain.shape, xtest.shape, ytest.shape

((50000, 3, 32, 32), (50000,), (10000, 3, 32, 32), (10000,))

## Spectral pooling Layer

In [4]:
def _forward_spectral_pool(images, filter_size):
    assert (torch.ge(filter_size, 3)).all()
    assert images.size()[-1] == images.size()[-2] and images.size()[-1] >= 3
    
    if int(filter_size) % 2 == 1:
        n = int((filter_size - 1)/2)
        top_left = images[:, :, :n+1, :n+1]
        top_right = images[:, :, :n+1, -n:]
        bottom_left = images[:, :, -n:, :n+1]
        bottom_right = images[:, :, -n:, -n:]
        top_combined = torch.cat([top_left, top_right], dim=-1)
        bottom_combined = torch.cat([bottom_left, bottom_right], dim=-1)
        all_together = torch.cat([top_combined, bottom_combined], dim=-2)
    
    else:
        n = int(filter_size / 2)
        top_left = images[:, :, :n, :n]
        top_middle = torch.unsqueeze(0.5**0.5 * (images[:, :, :n, n] + images[:, :, :n, -n]), -1)
        top_right = images[:, :, :n, -(n-1):]
        middle_left = torch.unsqueeze(0.5**0.5 * (images[:, :, n, :n] + images[:, :, -n, :n]), -2)
        middle_middle = torch.unsqueeze(torch.unsqueeze(0.5 * 
                                    (images[:, :, n, n] + images[:, :, n, -n] + images[:, :, -n, n] + images[:, :, -n, -n]), 
                                    -1), -1)
        middle_right = torch.unsqueeze(0.5**0.5 * (images[:, :, n, -(n-1):] + images[:, :, -n, -(n-1):]), -2)
        bottom_left = images[:, :, -(n-1):, :n]
        bottom_middle = torch.unsqueeze(0.5 ** 0.5 * (images[:, :, -(n-1):, n] + images[:, :, -(n-1):, -n]), -1)
        bottom_right = images[:, :, -(n-1):, -(n-1):]
        top_combined = torch.cat([top_left, top_middle, top_right], dim=-1)
        middle_combined = torch.cat([middle_left, middle_middle, middle_right], dim=-1)
        bottom_combined = torch.cat([bottom_left, bottom_middle, bottom_right], dim=-1)
        all_together = torch.cat([top_combined, middle_combined, bottom_combined], dim=-2)
        
    return all_together
    

class SpectralPool(Module):
    def __init__(self, filter_size):
        super(SpectralPool, self).__init__()
        self.filter_size = torch.IntTensor(1).fill_(filter_size)
        self.fft = fft.Fft2d()
        self.ifft = fft.Ifft2d()
        
    def forward(self, input):
        in_re, in_im = self.fft(input, torch.zeros_like(input).cuda())
        trans_re = _forward_spectral_pool(in_re, self.filter_size)
        trans_im = _forward_spectral_pool(in_im, self.filter_size)
        out_re, out_im = self.ifft(trans_re, trans_im)
        
        return out_re

## Generic Architecture

In [None]:
class Net(nn.Module):
    def __init__(self, kernel_size):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, kernel_size, padding=(kernel_size-1)//2)
        self.pool1 = SpectralPool(filter_size=16)
        
        self.conv2 = nn.Conv2d(96, 192, kernel_size, padding=(kernel_size-1)//2)
        self.pool2 = SpectralPool(filter_size=8)
        
        self.fc1 = nn.Linear(8*8*192, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)
    
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, 8 * 8 * 192)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [None]:
kernel_size = 3
batch_size = 128
learning_rate = 1e-3
weight_decay = 1e-3
total_epoch = 100

if __name__ == '__main__':
    net = Net(kernel_size).cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.8)
    
    img_gen = ImageGenerator(xtrain[:-4096], ytrain[:-4096])
    val_gen = ImageGenerator(xtrain[-4096:], ytrain[-4096:])
    
    generator = img_gen.next_batch_gen(batch_size)
    val_generator = val_gen.next_batch_gen(batch_size)
    
    iters = int((xtrain.shape[0] - 4096) / batch_size)
    val_iters = int(4096 / batch_size)
    
    for epoch in range(total_epoch):
        start = time.time()
        scheduler.step()
        
        # train
        loss_iter = []
        acc_iter = []
        for itr in range(iters):
            
            X_batch, y_batch = next(generator)
            inputs = Variable(torch.Tensor(X_batch).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            optimizer.zero_grad()
            
            outputs = net.forward(inputs)
            
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            _, predict = torch.max(outputs.data, 1)
            
            loss_iter.append(loss.data.cpu().numpy()[0])
            acc_iter.append(predict.eq(labels.data).cpu().sum())
        
        train_loss = np.mean(loss_iter)
        train_acc = np.sum(acc_iter) / (xtrain.shape[0] - 4096)
        
        # validation
        val_iter = []
        for itr in range(val_iters):
            X_batch, y_batch = next(val_generator)
            inputs = Variable(torch.Tensor(X_batch).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            outputs = net.forward(inputs)
            
            _, predict = torch.max(outputs.data, 1)        

            val_iter.append(predict.eq(labels.data).cpu().sum())
        
        val_acc = np.sum(val_iter) / 4096
        
        print('epoch: %d  train loss: %.3f  train acc: %.3f  val acc: %.3f' % (epoch + 1, train_loss, train_acc, val_acc))
    
    # test the network
    test_gen = ImageGenerator(xtest, ytest)
    generator = test_gen.next_batch_gen(batch_size)
    iters = int(xtest.shape[0] / batch_size)
    test_iter = []
    for itr in range(iters):
        X_batch, y_batch = next(val_generator)
        inputs = Variable(torch.Tensor(X_batch).cuda())
        labels = Variable(torch.LongTensor(y_batch).cuda())
        outputs = net.forward(inputs)
            
        _, predict = torch.max(outputs.data, 1)        

        test_iter.append(predict.eq(labels.data).cpu().sum())
        
    test_acc = np.sum(test_iter) / xtest.shape[0]
        
    print('test acc: %.3f' % (test_acc))

epoch: 1  train loss: 1.680  train acc: 0.395  val acc: 0.505
epoch: 2  train loss: 1.308  train acc: 0.533  val acc: 0.568
epoch: 3  train loss: 1.154  train acc: 0.589  val acc: 0.592
epoch: 4  train loss: 1.048  train acc: 0.625  val acc: 0.628
epoch: 5  train loss: 0.956  train acc: 0.660  val acc: 0.650
epoch: 6  train loss: 0.893  train acc: 0.684  val acc: 0.648
epoch: 7  train loss: 0.821  train acc: 0.710  val acc: 0.659
epoch: 8  train loss: 0.746  train acc: 0.736  val acc: 0.650
epoch: 9  train loss: 0.687  train acc: 0.757  val acc: 0.655
epoch: 10  train loss: 0.627  train acc: 0.779  val acc: 0.667
epoch: 11  train loss: 0.570  train acc: 0.796  val acc: 0.651
epoch: 12  train loss: 0.511  train acc: 0.818  val acc: 0.667
epoch: 13  train loss: 0.468  train acc: 0.834  val acc: 0.663
epoch: 14  train loss: 0.425  train acc: 0.848  val acc: 0.671
epoch: 15  train loss: 0.385  train acc: 0.862  val acc: 0.665
epoch: 16  train loss: 0.358  train acc: 0.871  val acc: 0.664
e

* Internet broke down after epoch 59. Since it was really slow, I didn't retry it to finish 100 epoch.

## Deep Architecture

In [5]:
class Net(nn.Module):
    def __init__(self, kernel_size):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 128, kernel_size, padding=(kernel_size-1)//2)
        self.pool1 = SpectralPool(filter_size=16)
        
        self.conv2 = nn.Conv2d(128, 160, kernel_size, padding=(kernel_size-1)//2)
        self.pool2 = SpectralPool(filter_size=8)
        
        self.conv3 = nn.Conv2d(160, 192, kernel_size, padding=(kernel_size-1)//2)
        self.pool3 = SpectralPool(filter_size=4)
        
        self.conv4 = nn.Conv2d(192, 192, kernel_size=1, padding=0)
        self.conv5 = nn.Conv2d(192, 10, kernel_size=1, padding=0)
        
        self.avg = nn.AvgPool2d(4)
    
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))
        x = self.conv5(F.relu(self.conv4(x)))
        
        return torch.squeeze(self.avg(x))

In [6]:
kernel_size = 3
batch_size = 128
learning_rate = 1e-3
weight_decay = 1e-3
total_epoch = 50

if __name__ == '__main__':
    net = Net(kernel_size).cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.8)
    
    img_gen = ImageGenerator(xtrain[:-4096], ytrain[:-4096])
    val_gen = ImageGenerator(xtrain[-4096:], ytrain[-4096:])
    
    generator = img_gen.next_batch_gen(batch_size)
    val_generator = val_gen.next_batch_gen(batch_size)
    
    iters = int((xtrain.shape[0] - 4096) / batch_size)
    val_iters = int(4096 / batch_size)
    
    for epoch in range(total_epoch):
        start = time.time()
        scheduler.step()
        
        # train
        loss_iter = []
        acc_iter = []
        for itr in range(iters):
            
            X_batch, y_batch = next(generator)
            inputs = Variable(torch.Tensor(X_batch).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            optimizer.zero_grad()
            
            outputs = net.forward(inputs)
            
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            _, predict = torch.max(outputs.data, 1)
            
            loss_iter.append(loss.data.cpu().numpy()[0])
            acc_iter.append(predict.eq(labels.data).cpu().sum())
        
        train_loss = np.mean(loss_iter)
        train_acc = np.sum(acc_iter) / (xtrain.shape[0] - 4096)
        
        # validation
        val_iter = []
        for itr in range(val_iters):
            X_batch, y_batch = next(val_generator)
            inputs = Variable(torch.Tensor(X_batch).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            outputs = net.forward(inputs)
            
            _, predict = torch.max(outputs.data, 1)        

            val_iter.append(predict.eq(labels.data).cpu().sum())
        
        val_acc = np.sum(val_iter) / 4096
        
        print('epoch: %d  train loss: %.3f  train acc: %.3f  val acc: %.3f' % (epoch + 1, train_loss, train_acc, val_acc))
    
    # test the network
    test_gen = ImageGenerator(xtest, ytest)
    generator = test_gen.next_batch_gen(batch_size)
    iters = int(xtest.shape[0] / batch_size)
    test_iter = []
    for itr in range(iters):
        X_batch, y_batch = next(val_generator)
        inputs = Variable(torch.Tensor(X_batch).cuda())
        labels = Variable(torch.LongTensor(y_batch).cuda())
        outputs = net.forward(inputs)
            
        _, predict = torch.max(outputs.data, 1)        

        test_iter.append(predict.eq(labels.data).cpu().sum())
        
    test_acc = np.sum(test_iter) / xtest.shape[0]
        
    print('test acc: %.3f' % (test_acc))

epoch: 1  train loss: 1.830  train acc: 0.319  val acc: 0.443
epoch: 2  train loss: 1.452  train acc: 0.470  val acc: 0.501
epoch: 3  train loss: 1.332  train acc: 0.516  val acc: 0.528
epoch: 4  train loss: 1.238  train acc: 0.553  val acc: 0.558
epoch: 5  train loss: 1.167  train acc: 0.581  val acc: 0.595
epoch: 6  train loss: 1.102  train acc: 0.606  val acc: 0.613
epoch: 7  train loss: 1.049  train acc: 0.628  val acc: 0.632
epoch: 8  train loss: 1.014  train acc: 0.641  val acc: 0.644
epoch: 9  train loss: 0.969  train acc: 0.656  val acc: 0.646
epoch: 10  train loss: 0.944  train acc: 0.663  val acc: 0.684
epoch: 11  train loss: 0.911  train acc: 0.678  val acc: 0.662
epoch: 12  train loss: 0.885  train acc: 0.686  val acc: 0.698
epoch: 13  train loss: 0.861  train acc: 0.696  val acc: 0.687
epoch: 14  train loss: 0.828  train acc: 0.707  val acc: 0.685
epoch: 15  train loss: 0.810  train acc: 0.714  val acc: 0.681
epoch: 16  train loss: 0.791  train acc: 0.723  val acc: 0.707
e