In [1]:
import numpy as np
import time
np.set_printoptions(precision=1)
# import tensorflow as tf
import matplotlib.pylab as plt

from modules.utils import load_cifar10
# from modules.cnn_with_spectral_parameterization import CNN_Spectral_Param
# from modules.cnn_with_spectral_pooling import CNN_Spectral_Pool
from modules.image_generator import ImageGenerator

import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.module import Module
import pytorch_fft.fft.autograd as fft

% matplotlib inline
% load_ext autoreload
% autoreload 2

In [2]:
# In the interest of training time, we only used 1 of 5 cifar10 batches
# The important part of the experiment is to compare the rates of convergence of training accuracy,
# so subsetting the training dataset for both spectral and spatial models shouldn't impact
# the relationship between their train accuracy convergences
xtrain, ytrain, xtest, ytest = load_cifar10(1, channels_last=False)

file already downloaded..
getting batch 1


In [3]:
xtrain.shape, ytrain.shape, xtest.shape, ytest.shape

((10000, 3, 32, 32), (10000,), (10000, 3, 32, 32), (10000,))

## Spectral parameterization

In [21]:
class SpectralParam(Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True):
        super(SpectralParam, self).__init__()
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.groups = groups
        
        self.ifft = fft.Ifft2d()
        
        weight = torch.Tensor(out_channels, in_channels, kernel_size, kernel_size).cuda()
        nn.init.xavier_uniform(weight)
        weight_re, weight_im = fft.fft2(weight, torch.zeros_like(weight).cuda())
        
        self.weight_re = nn.Parameter(weight_re, requires_grad=True)
        self.weight_im = nn.Parameter(weight_im, requires_grad=True)

        if bias:
            self.bias = nn.Parameter(torch.Tensor(out_channels), requires_grad=True)
            nn.init.normal(self.bias)
        else:
            self.bias = None
            self.register_parameter('bias', None)
        
    def forward(self, input):
        weight, _ = self.ifft(self.weight_re, self.weight_im)
#         weight, _ = self.ifft(self.weight_re, torch.zeros_like(self.weight_re).cuda())
        result = F.conv2d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
        
        return result

### test layer

In [16]:
A = Variable(torch.arange(16).view(1, 1, 4, 4).cuda(), requires_grad=True)
model = SpectralParam(1, 3, 2).cuda()

B = model(A)
print(B)
C = torch.sum(B * B)
C.backward()
print(A.grad, model.weight_re.grad)

Variable containing:
(0 ,0 ,.,.) = 
  -2.4363  -2.9699  -3.5035
  -4.5707  -5.1043  -5.6379
  -6.7050  -7.2386  -7.7722

(0 ,1 ,.,.) = 
  -1.9191  -2.8061  -3.6930
  -5.4670  -6.3539  -7.2409
  -9.0149  -9.9018 -10.7888

(0 ,2 ,.,.) = 
  -0.5217   0.0315   0.5847
   1.6911   2.2443   2.7975
   3.9039   4.4571   5.0103
[torch.cuda.FloatTensor of size 1x3x3x3 (GPU 0)]

Variable containing:
(0 ,0 ,.,.) = 
   1.5356   1.8990   3.2706  -0.8773
   9.3115  13.5115  16.2664   1.9149
  19.7047  24.5312  27.2862   2.5415
  12.6273  17.1937  18.5771   3.5221
[torch.cuda.FloatTensor of size 1x1x4x4 (GPU 0)]
 Variable containing:
(0 ,0 ,.,.) = 
 -7.9793e+02  4.5938e+01
  1.8375e+02 -1.5259e-05

(1 ,0 ,.,.) = 
 -1.0387e+03  5.7186e+01
  2.2874e+02  1.5259e-05

(2 ,0 ,.,.) = 
  4.1583e+02 -2.0198e+01
 -8.0794e+01  7.6294e-06
[torch.cuda.FloatTensor of size 3x1x2x2 (GPU 0)]



## Generic Architecture

In [17]:
class Net(nn.Module):
    def __init__(self, kernel_size):
        super(Net, self).__init__()
        self.conv1 = SpectralParam(3, 96, kernel_size, padding=(kernel_size-1)//2)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.conv2 = SpectralParam(96, 192, kernel_size, padding=(kernel_size-1)//2)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.fc1 = nn.Linear(8*8*192, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)
    
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, 8 * 8 * 192)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [22]:
kernel_size = 3
batch_size = 128
learning_rate = 1e-3
weight_decay = 1e-3
total_epoch = 100

if __name__ == '__main__':
    net = Net(kernel_size).cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.8)
    
    img_gen = ImageGenerator(xtrain[:-4096], ytrain[:-4096])
    val_gen = ImageGenerator(xtrain[-4096:], ytrain[-4096:])
    
    generator = img_gen.next_batch_gen(batch_size)
    val_generator = val_gen.next_batch_gen(batch_size)
    
    iters = int((xtrain.shape[0] - 4096) / batch_size)
    val_iters = int(4096 / batch_size)
    
    for epoch in range(total_epoch):
        start = time.time()
        scheduler.step()
        
        # train
        loss_iter = []
        acc_iter = []
        for itr in range(iters):
            
            X_batch, y_batch = next(generator)
            inputs = Variable(torch.Tensor(X_batch).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            optimizer.zero_grad()
            
            outputs = net.forward(inputs)
            
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            _, predict = torch.max(outputs.data, 1)
            
            loss_iter.append(loss.data.cpu().numpy()[0])
            acc_iter.append(predict.eq(labels.data).cpu().sum())
        
        train_loss = np.mean(loss_iter)
        train_acc = np.sum(acc_iter) / (xtrain.shape[0] - 4096)
        
        # validation
        val_iter = []
        for itr in range(val_iters):
            X_batch, y_batch = next(val_generator)
            inputs = Variable(torch.Tensor(X_batch).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            outputs = net.forward(inputs)
            
            _, predict = torch.max(outputs.data, 1)        

            val_iter.append(predict.eq(labels.data).cpu().sum())
        
        val_acc = np.sum(val_iter) / 4096
        
        print('epoch: %d  train loss: %.3f  train acc: %.3f  val acc: %.3f' % (epoch + 1, train_loss, train_acc, val_acc))
    
    # test the network
    test_gen = ImageGenerator(xtest, ytest)
    generator = test_gen.next_batch_gen(batch_size)
    iters = int(xtest.shape[0] / batch_size)
    test_iter = []
    for itr in range(iters):
        X_batch, y_batch = next(val_generator)
        inputs = Variable(torch.Tensor(X_batch).cuda())
        labels = Variable(torch.LongTensor(y_batch).cuda())
        outputs = net.forward(inputs)
            
        _, predict = torch.max(outputs.data, 1)        

        test_iter.append(predict.eq(labels.data).cpu().sum())
        
    test_acc = np.sum(test_iter) / xtest.shape[0]
        
    print('test acc: %.3f' % (test_acc))

epoch: 1  train loss: 2.486  train acc: 0.096  val acc: 0.097
epoch: 2  train loss: 2.307  train acc: 0.107  val acc: 0.104
epoch: 3  train loss: 2.255  train acc: 0.151  val acc: 0.143
epoch: 4  train loss: 2.171  train acc: 0.178  val acc: 0.192
epoch: 5  train loss: 2.065  train acc: 0.221  val acc: 0.216
epoch: 6  train loss: 1.929  train acc: 0.271  val acc: 0.283
epoch: 7  train loss: 1.805  train acc: 0.318  val acc: 0.328
epoch: 8  train loss: 1.719  train acc: 0.355  val acc: 0.327
epoch: 9  train loss: 1.672  train acc: 0.376  val acc: 0.375
epoch: 10  train loss: 1.601  train acc: 0.409  val acc: 0.408
epoch: 11  train loss: 1.567  train acc: 0.424  val acc: 0.422
epoch: 12  train loss: 1.506  train acc: 0.444  val acc: 0.432
epoch: 13  train loss: 1.452  train acc: 0.465  val acc: 0.421
epoch: 14  train loss: 1.416  train acc: 0.484  val acc: 0.475
epoch: 15  train loss: 1.362  train acc: 0.504  val acc: 0.487
epoch: 16  train loss: 1.311  train acc: 0.519  val acc: 0.487
e

## Deep Architecture

In [23]:
class Net(nn.Module):
    def __init__(self, kernel_size):
        super(Net, self).__init__()
        self.conv1 = SpectralParam(3, 128, kernel_size, padding=(kernel_size-1)//2)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.conv2 = SpectralParam(128, 160, kernel_size, padding=(kernel_size-1)//2)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.conv3 = SpectralParam(160, 192, kernel_size, padding=(kernel_size-1)//2)
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.conv4 = SpectralParam(192, 192, kernel_size=1, padding=0)
        self.conv5 = SpectralParam(192, 10, kernel_size=1, padding=0)
        
        self.avg = nn.AvgPool2d(4)
    
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))
        x = self.conv5(F.relu(self.conv4(x)))
        
        return torch.squeeze(self.avg(x))

In [25]:
kernel_size = 3
batch_size = 128
learning_rate = 5e-3
weight_decay = 1e-3
total_epoch = 100

if __name__ == '__main__':
    net = Net(kernel_size).cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.8)
    
    img_gen = ImageGenerator(xtrain[:-4096], ytrain[:-4096])
    val_gen = ImageGenerator(xtrain[-4096:], ytrain[-4096:])
    
    generator = img_gen.next_batch_gen(batch_size)
    val_generator = val_gen.next_batch_gen(batch_size)
    
    iters = int((xtrain.shape[0] - 4096) / batch_size)
    val_iters = int(4096 / batch_size)
    
    for epoch in range(total_epoch):
        start = time.time()
        scheduler.step()
        
        # train
        loss_iter = []
        acc_iter = []
        for itr in range(iters):
            
            X_batch, y_batch = next(generator)
            inputs = Variable(torch.Tensor(X_batch).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            optimizer.zero_grad()
            
            outputs = net.forward(inputs)
            
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            _, predict = torch.max(outputs.data, 1)
            
            loss_iter.append(loss.data.cpu().numpy()[0])
            acc_iter.append(predict.eq(labels.data).cpu().sum())
        
        train_loss = np.mean(loss_iter)
        train_acc = np.sum(acc_iter) / (xtrain.shape[0] - 4096)
        
        # validation
        val_iter = []
        for itr in range(val_iters):
            X_batch, y_batch = next(val_generator)
            inputs = Variable(torch.Tensor(X_batch).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            outputs = net.forward(inputs)
            
            _, predict = torch.max(outputs.data, 1)        

            val_iter.append(predict.eq(labels.data).cpu().sum())
        
        val_acc = np.sum(val_iter) / 4096
        
        print('epoch: %d  train loss: %.3f  train acc: %.3f  val acc: %.3f' % (epoch + 1, train_loss, train_acc, val_acc))
    
    # test the network
    test_gen = ImageGenerator(xtest, ytest)
    generator = test_gen.next_batch_gen(batch_size)
    iters = int(xtest.shape[0] / batch_size)
    test_iter = []
    for itr in range(iters):
        X_batch, y_batch = next(val_generator)
        inputs = Variable(torch.Tensor(X_batch).cuda())
        labels = Variable(torch.LongTensor(y_batch).cuda())
        outputs = net.forward(inputs)
            
        _, predict = torch.max(outputs.data, 1)        

        test_iter.append(predict.eq(labels.data).cpu().sum())
        
    test_acc = np.sum(test_iter) / xtest.shape[0]
        
    print('test acc: %.3f' % (test_acc))

epoch: 1  train loss: 2.406  train acc: 0.098  val acc: 0.103
epoch: 2  train loss: 2.296  train acc: 0.121  val acc: 0.165
epoch: 3  train loss: 2.147  train acc: 0.186  val acc: 0.182
epoch: 4  train loss: 2.020  train acc: 0.226  val acc: 0.231
epoch: 5  train loss: 1.914  train acc: 0.275  val acc: 0.306
epoch: 6  train loss: 1.828  train acc: 0.314  val acc: 0.334
epoch: 7  train loss: 1.759  train acc: 0.338  val acc: 0.340
epoch: 8  train loss: 1.730  train acc: 0.359  val acc: 0.375
epoch: 9  train loss: 1.647  train acc: 0.386  val acc: 0.381
epoch: 10  train loss: 1.632  train acc: 0.401  val acc: 0.395
epoch: 11  train loss: 1.588  train acc: 0.409  val acc: 0.374
epoch: 12  train loss: 1.573  train acc: 0.423  val acc: 0.408
epoch: 13  train loss: 1.517  train acc: 0.440  val acc: 0.440
epoch: 14  train loss: 1.512  train acc: 0.451  val acc: 0.412
epoch: 15  train loss: 1.481  train acc: 0.457  val acc: 0.423
epoch: 16  train loss: 1.435  train acc: 0.472  val acc: 0.465
e