In [9]:
import numpy as np
np.set_printoptions(precision=1)
# import tensorflow as tf
import matplotlib.pylab as plt

from modules.utils import load_cifar10
# from modules.cnn_with_spectral_parameterization import CNN_Spectral_Param
# from modules.cnn_with_spectral_pooling import CNN_Spectral_Pool
from modules.image_generator import ImageGenerator

import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.module import Module

% matplotlib inline
% load_ext autoreload
% autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# In the interest of training time, we only used 1 of 5 cifar10 batches
# The important part of the experiment is to compare the rates of convergence of training accuracy,
# so subsetting the training dataset for both spectral and spatial models shouldn't impact
# the relationship between their train accuracy convergences
xtrain, ytrain, xtest, ytest = load_cifar10(1, channels_last=True)

file already downloaded..
getting batch 1


In [3]:
xtrain.shape, ytrain.shape, xtest.shape, ytest.shape

((10000, 32, 32, 3), (10000,), (10000, 32, 32, 3), (10000,))

# 1. Rewrite tensorflow model to pytorch

## model in Pytorch 

In [4]:
class generic_Net(nn.Module):
    def __init__(self, kernel_size):
        super(generic_Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, kernel_size, padding=(kernel_size-1)//2)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(96, 192, kernel_size, padding=(kernel_size-1)//2)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(8*8*192, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)
    
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, 8 * 8 * 192)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

    
class deep_Net(nn.Module):
    def __init__(self, kernel_size):
        super(deep_Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, kernel_size, padding=(kernel_size-1)//2)
        self.conv2 = nn.Conv2d(96, 96, kernel_size, padding=(kernel_size-1)//2)
        self.pad1 = nn.ZeroPad2d((0, 1, 0, 1))
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        
        self.conv3 = nn.Conv2d(96, 192, kernel_size, padding=(kernel_size-1)//2)
        self.conv4 = nn.Conv2d(192, 192, kernel_size, padding=(kernel_size-1)//2)
        self.conv5 = nn.Conv2d(192, 192, kernel_size, padding=(kernel_size-1)//2)
        self.pad2 = nn.ZeroPad2d((0, 1, 0, 1))
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        
        self.conv6 = nn.Conv2d(192, 192, kernel_size, padding=(kernel_size-1)//2)
        self.conv7 = nn.Conv2d(192, 10, kernel_size, padding=(kernel_size-1)//2)
    
    def forward(self, x):
        x = F.relu(self.conv2(F.relu(self.conv1(x))))
        x = self.pool1(self.pad1(x))
        x = F.relu(self.conv5(F.relu(self.conv4(F.relu(self.conv3(x))))))
        x = self.pool2(self.pad2(x))
        x = self.conv7(F.relu(self.conv6(x)))
        for ax in [-2, -1]:
            x = torch.mean(x, ax)
        return x

## Run models with pytorch

In [6]:
kernel_size = 3
batch_size = 200
learning_rate = 1e-5
l2norm = 0.01
total_epoch = 100

if __name__ == '__main__':
    net = generic_Net(kernel_size).cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    
    img_gen = ImageGenerator(xtrain, ytrain)
#     img_gen.translate(shift_height=-2, shift_width=0)
    
    generator = img_gen.next_batch_gen(batch_size)
    iters = int(xtrain.shape[0] / batch_size)
    
    itr = 0
    for epoch in range(total_epoch):
#         if epc % 4 == 0 or epc % 4 == 1:
#             img_gen.translate(shift_height=2, shift_width=0)
#         elif epc % 4 == 2 or epc % 4 == 3:
#             img_gen.translate(shift_height=-2, shift_width=0)
        
        loss_iter = []
        acc_iter = []
        for itr in range(iters):
            itr += 1
            
            X_batch, y_batch = next(generator)
            inputs = Variable(torch.Tensor(X_batch.transpose(0,3,1,2)).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            
            outputs = net.forward(inputs)
            loss = criterion(outputs, labels)
            
            t = -1
            for w in net.parameters():
                t += 1
                if t % 2 == 1: # because parameters contain both bias and weights, only need weights here
                    continue
                loss += w.norm(2) * l2norm
            
            loss.backward()
            optimizer.step()
            
            _, predict = torch.max(outputs.data, 1)
            
            loss_iter.append(loss.data.cpu().numpy()[0])
            acc_iter.append(predict.eq(labels.data).cpu().sum() / batch_size)
        
        ave_loss = np.mean(loss_iter)
        ave_acc = np.mean(acc_iter)
        print('epoch: %d  loss: %.3f  acc: %.3f ' % (epoch + 1, ave_loss, ave_acc))

epoch: 1  loss: 2.750  acc: 0.140 
epoch: 2  loss: 2.657  acc: 0.203 
epoch: 3  loss: 2.711  acc: 0.175 
epoch: 4  loss: 2.756  acc: 0.149 
epoch: 5  loss: 2.685  acc: 0.141 
epoch: 6  loss: 2.651  acc: 0.179 
epoch: 7  loss: 2.643  acc: 0.182 
epoch: 8  loss: 2.597  acc: 0.169 
epoch: 9  loss: 2.520  acc: 0.192 
epoch: 10  loss: 2.450  acc: 0.239 
epoch: 11  loss: 2.503  acc: 0.238 
epoch: 12  loss: 2.675  acc: 0.198 
epoch: 13  loss: 2.553  acc: 0.236 
epoch: 14  loss: 2.585  acc: 0.220 
epoch: 15  loss: 2.618  acc: 0.214 
epoch: 16  loss: 2.560  acc: 0.222 
epoch: 17  loss: 2.509  acc: 0.212 
epoch: 18  loss: 2.478  acc: 0.226 
epoch: 19  loss: 2.478  acc: 0.247 
epoch: 20  loss: 2.626  acc: 0.222 
epoch: 21  loss: 2.585  acc: 0.249 
epoch: 22  loss: 2.678  acc: 0.238 
epoch: 23  loss: 2.543  acc: 0.229 
epoch: 24  loss: 2.676  acc: 0.187 
epoch: 25  loss: 2.657  acc: 0.189 
epoch: 26  loss: 2.619  acc: 0.232 
epoch: 27  loss: 2.632  acc: 0.222 
epoch: 28  loss: 2.650  acc: 0.196 
e

In [7]:
kernel_size = 3
batch_size = 200
learning_rate = 5e-6
l2norm = 0.01
total_epoch = 100

if __name__ == '__main__':
    net = deep_Net(kernel_size).cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    
    img_gen = ImageGenerator(xtrain, ytrain)
#     img_gen.translate(shift_height=-2, shift_width=0)
    
    generator = img_gen.next_batch_gen(batch_size)
    iters = int(xtrain.shape[0] / batch_size)
    
    itr = 0
    for epoch in range(total_epoch):
#         if epc % 4 == 0 or epc % 4 == 1:
#             img_gen.translate(shift_height=2, shift_width=0)
#         elif epc % 4 == 2 or epc % 4 == 3:
#             img_gen.translate(shift_height=-2, shift_width=0)
        
        loss_iter = []
        acc_iter = []
        for itr in range(iters):
            itr += 1
            
            X_batch, y_batch = next(generator)
            inputs = Variable(torch.Tensor(X_batch.transpose(0,3,1,2)).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            
            outputs = net.forward(inputs)
            loss = criterion(outputs, labels)
            
            t = -1
            for w in net.parameters():
                t += 1
                if t % 2 == 1:  # because parameters contain both bias and weights, only need weights here
                    continue
                loss += w.norm(2) * l2norm
            
            loss.backward()
            optimizer.step()
            
            _, predict = torch.max(outputs.data, 1)
            
            loss_iter.append(loss.data.cpu().numpy()[0])
            acc_iter.append(predict.eq(labels.data).cpu().sum() / batch_size)
        
        ave_loss = np.mean(loss_iter)
        ave_acc = np.mean(acc_iter)
        print('epoch: %d  loss: %.3f  acc: %.3f ' % (epoch + 1, ave_loss, ave_acc))

epoch: 1  loss: 2.751  acc: 0.098 
epoch: 2  loss: 2.743  acc: 0.098 
epoch: 3  loss: 2.735  acc: 0.098 
epoch: 4  loss: 2.726  acc: 0.113 
epoch: 5  loss: 2.716  acc: 0.104 
epoch: 6  loss: 2.706  acc: 0.107 
epoch: 7  loss: 2.694  acc: 0.117 
epoch: 8  loss: 2.675  acc: 0.131 
epoch: 9  loss: 2.638  acc: 0.157 
epoch: 10  loss: 2.580  acc: 0.171 
epoch: 11  loss: 2.707  acc: 0.145 
epoch: 12  loss: 2.805  acc: 0.103 
epoch: 13  loss: 2.668  acc: 0.105 
epoch: 14  loss: 2.655  acc: 0.102 
epoch: 15  loss: 2.650  acc: 0.099 
epoch: 16  loss: 2.644  acc: 0.098 
epoch: 17  loss: 2.637  acc: 0.098 
epoch: 18  loss: 2.624  acc: 0.098 
epoch: 19  loss: 2.598  acc: 0.098 
epoch: 20  loss: 2.609  acc: 0.098 
epoch: 21  loss: 2.575  acc: 0.098 
epoch: 22  loss: 2.568  acc: 0.127 
epoch: 23  loss: 2.563  acc: 0.142 
epoch: 24  loss: 2.528  acc: 0.146 
epoch: 25  loss: 2.538  acc: 0.131 
epoch: 26  loss: 2.525  acc: 0.141 
epoch: 27  loss: 2.536  acc: 0.143 
epoch: 28  loss: 2.559  acc: 0.135 
e

# 2. Spectral pooling with CPU
## Spectral Pooling Layer with Numpy
### Should be very slow

In [3]:
def _forward_spectral_pool(images, pool_stride):
    assert pool_stride >= 3
    assert images.shape[-1] == images.shape[-2] and images.shape[-1] >= 3
    
    n = (images.shape[-1] - 1) // pool_stride + 1
    
    top_left = images[:, :, :n, :n]
    top_right = images[:, :, :n, -n:]
    bottom_left = images[:, :, -n:, :n]
    bottom_right = images[:, :, -n:, -n:]
    
    top_combined = np.concatenate([top_left, top_right], axis=-1)
    bottom_combined = np.concatenate([bottom_left, bottom_right], axis=-1)
    all_together = np.concatenate([top_combined, bottom_combined], axis=-2)
    
    return all_together

def _backward_spectral_pool(grad, origin_shape):
    m = grad.shape[-1]
    assert m%2 == 0
    
    pad1 = np.zeros((grad.shape[0], grad.shape[1], m//2, origin_shape-m))
    pad2 = np.zeros((grad.shape[0], grad.shape[1], origin_shape-m, m//2))
    pad3 = np.zeros((grad.shape[0], grad.shape[1], origin_shape-m, origin_shape-m))
    
    top_left = grad[:, :, :m//2, :m//2]
    top_right = grad[:, :, :m//2, -m//2:]
    bottom_left = grad[:, :, -m//2:, :m//2]
    bottom_right = grad[:, :, -m//2:, -m//2:]
    
    top_combined = np.concatenate([top_left, pad1, top_right], axis=-1)
    pad_combined = np.concatenate([pad2, pad3, pad2], axis=-1)
    bottom_combined = np.concatenate([bottom_left, pad1, bottom_right], axis=-1)
    all_together = np.concatenate([top_combined, pad_combined, bottom_combined], axis=-2)
    
    return all_together
    

class Poolfunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input, pool_stride):
        ctx.save_for_backward(input)
        im_fft = np.fft.fft2(input.cpu().numpy())
        im_transformed = _forward_spectral_pool(im_fft, pool_stride.numpy()[0])
        im_out = np.real(np.fft.ifft2(im_transformed))
        return torch.FloatTensor(im_out).cuda()
        
    @staticmethod
    def backward(ctx, grad_output):
        input = ctx.saved_tensors[0]
        
        grad_fft = np.fft.fft2(grad_output.data.cpu().numpy())
        grad_transformed = _backward_spectral_pool(grad_fft, input.cpu().numpy().shape[-1])
        grad_out = np.real(np.fft.ifft2(grad_transformed))
        return Variable(torch.FloatTensor(grad_out).cuda(), volatile=True), None

class SpectralPool(Module):
    def __init__(self, pool_stride):
        super(SpectralPool, self).__init__()
        self.pool_stride = torch.IntTensor(1).fill_(pool_stride)
        
    def forward(self, input):
        return Poolfunction.apply(input, self.pool_stride)

### verify the spectral pooling layer

In [4]:
# test Spectral Pooling module
input = Variable(torch.randn(1, 1, 8, 8).cuda(), requires_grad=True)
module = SpectralPool(pool_stride=4).cuda()
result = module(input)
print(result.data)
result.backward(torch.randn(result.size()).cuda())
print(input.grad)


(0 ,0 ,.,.) = 
 -2.5425  0.8660  1.2152  1.2327
  3.6477 -2.2184 -1.2238 -3.3269
 -0.2555  3.1346 -3.7369 -1.6330
  0.3812 -2.9214 -0.7644  1.8675
[torch.cuda.FloatTensor of size 1x1x4x4 (GPU 0)]

Variable containing:
(0 ,0 ,.,.) = 
 -0.0403  0.0379 -0.0379  0.1785 -0.2391  0.0461  0.1493 -0.0945
  0.0114 -0.0549  0.1233 -0.2526  0.2788 -0.1107 -0.0652  0.0698
 -0.0177  0.0607 -0.1452  0.2249 -0.2500  0.2026 -0.1137  0.0384
 -0.2204  0.2215 -0.0523 -0.1505  0.3179 -0.3897  0.2739 -0.0005
  0.2875 -0.3652  0.2104  0.0462 -0.2943  0.4285 -0.3303  0.0172
  0.1316  0.0126 -0.0935  0.0656  0.0446 -0.1746  0.2584 -0.2447
 -0.5155  0.3683 -0.0870 -0.0928  0.1365 -0.0894 -0.0917  0.3716
  0.3634 -0.2810  0.0821 -0.0194  0.0056  0.0872 -0.0808 -0.1572
[torch.cuda.FloatTensor of size 1x1x8x8 (GPU 0)]



## Implement Numpy Spectral Pool with simple model: generic

In [5]:
class generic_Net(nn.Module):
    def __init__(self, kernel_size):
        super(generic_Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, kernel_size, padding=(kernel_size-1)//2)
        self.pool1 = SpectralPool(pool_stride=4)
        
        self.conv2 = nn.Conv2d(96, 192, kernel_size, padding=(kernel_size-1)//2)
        self.pool2 = SpectralPool(pool_stride=4)
        
        self.fc1 = nn.Linear(8*8*192, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool2(x)
        x = x.view(-1, 8 * 8 * 192)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [7]:
# Really slow

kernel_size = 3
batch_size = 200
learning_rate = 1e-5
l2norm = 0.01
total_epoch = 10

if __name__ == '__main__':
    net = generic_Net(kernel_size).cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    
    img_gen = ImageGenerator(xtrain, ytrain)
    
    generator = img_gen.next_batch_gen(batch_size)
    iters = int(xtrain.shape[0] / batch_size)
    
    itr = 0
    for epoch in range(total_epoch):
        
        loss_iter = []
        acc_iter = []
        for itr in range(iters):
            itr += 1
            X_batch, y_batch = next(generator)
            inputs = Variable(torch.Tensor(X_batch.transpose(0,3,1,2)).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            outputs = net.forward(inputs)
            loss = criterion(outputs, labels)
            t = -1
            for w in net.parameters():
                t += 1
                if t % 2 == 1:  # because parameters contain both bias and weights, only need weights here
                    continue
                loss += w.norm(2) * l2norm
            
            loss.backward()
            optimizer.step()
            
            _, predict = torch.max(outputs.data, 1)
            
            loss_iter.append(loss.data.cpu().numpy()[0])
            acc_iter.append(predict.eq(labels.data).cpu().sum() / batch_size)
        
        ave_loss = np.mean(loss_iter)
        ave_acc = np.mean(acc_iter)
        print('epoch: %d  loss: %.3f  acc: %.3f ' % (epoch + 1, ave_loss, ave_acc))

epoch: 1  loss: 2.764  acc: 0.094 
epoch: 2  loss: 2.736  acc: 0.134 
epoch: 3  loss: 2.705  acc: 0.128 
epoch: 4  loss: 2.675  acc: 0.124 
epoch: 5  loss: 2.645  acc: 0.127 
epoch: 6  loss: 2.618  acc: 0.131 
epoch: 7  loss: 2.591  acc: 0.131 
epoch: 8  loss: 2.568  acc: 0.146 
epoch: 9  loss: 2.546  acc: 0.150 
epoch: 10  loss: 2.528  acc: 0.156 


# 3. Spectral pooling with GPU
## Spectral Pooling Layer with Pytorch-fft

In [14]:
import pytorch_fft.fft.autograd as fft

def _forward_spectral_pool(images, pool_stride):
    assert (torch.gt(pool_stride, 3)).all()
    assert images.size()[-1] == images.size()[-2] and images.size()[-1] >= 3
    
    n = int((images.size()[-1] - 1) / pool_stride + 1)
    
    top_left = images[:, :, :n, :n]
    top_right = images[:, :, :n, -n:]
    bottom_left = images[:, :, -n:, :n]
    bottom_right = images[:, :, -n:, -n:]
    
    top_combined = torch.cat([top_left, top_right], dim=-1)
    bottom_combined = torch.cat([bottom_left, bottom_right], dim=-1)
    all_together = torch.cat([top_combined, bottom_combined], dim=-2)
    
    return all_together
    

class SpectralPool(Module):
    def __init__(self, pool_stride):
        super(SpectralPool, self).__init__()
        self.pool_stride = torch.IntTensor(1).fill_(pool_stride)
        self.fft = fft.Fft2d()
        self.ifft = fft.Ifft2d()
        
    def forward(self, input):
        in_re, in_im = self.fft(input, torch.zeros_like(input).cuda())
        trans_re = _forward_spectral_pool(in_re, self.pool_stride)
        trans_im = _forward_spectral_pool(in_im, self.pool_stride)
        out_re, _ = self.ifft(trans_re, trans_im)
        
        return out_re

## Implement Pytorch-fft Spectral Pool with simple model: generic

In [15]:
class generic_Net(nn.Module):
    def __init__(self, kernel_size):
        super(generic_Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, kernel_size, padding=(kernel_size-1)//2)
        self.pool1 = SpectralPool(pool_stride=4)
        
        self.conv2 = nn.Conv2d(96, 192, kernel_size, padding=(kernel_size-1)//2)
        self.pool2 = SpectralPool(pool_stride=4)
        
        self.fc1 = nn.Linear(8*8*192, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool2(x)
        x = x.view(-1, 8 * 8 * 192)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [16]:
kernel_size = 3
batch_size = 200
learning_rate = 1e-5
l2norm = 0.01
total_epoch = 100

if __name__ == '__main__':
    net = generic_Net(kernel_size).cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    
    img_gen = ImageGenerator(xtrain, ytrain)
    
    generator = img_gen.next_batch_gen(batch_size)
    iters = int(xtrain.shape[0] / batch_size)
    
    itr = 0
    for epoch in range(total_epoch):
        
        loss_iter = []
        acc_iter = []
        for itr in range(iters):
            itr += 1
            X_batch, y_batch = next(generator)
            inputs = Variable(torch.Tensor(X_batch.transpose(0,3,1,2)).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            outputs = net.forward(inputs)
            loss = criterion(outputs, labels)
            t = -1
            for w in net.parameters():
                t += 1
                if t % 2 == 1:  # because parameters contain both bias and weights, only need weights here
                    continue
                loss += w.norm(2) * l2norm
            
            loss.backward()
            optimizer.step()
            
            _, predict = torch.max(outputs.data, 1)
            
            loss_iter.append(loss.data.cpu().numpy()[0])
            acc_iter.append(predict.eq(labels.data).cpu().sum() / batch_size)
        
        ave_loss = np.mean(loss_iter)
        ave_acc = np.mean(acc_iter)
        print('epoch: %d  loss: %.3f  acc: %.3f ' % (epoch + 1, ave_loss, ave_acc))

epoch: 1  loss: 2.679  acc: 0.169 
epoch: 2  loss: 2.684  acc: 0.207 
epoch: 3  loss: 2.609  acc: 0.215 
epoch: 4  loss: 2.584  acc: 0.199 
epoch: 5  loss: 2.519  acc: 0.235 
epoch: 6  loss: 2.423  acc: 0.283 
epoch: 7  loss: 2.428  acc: 0.294 
epoch: 8  loss: 2.436  acc: 0.280 
epoch: 9  loss: 2.408  acc: 0.272 
epoch: 10  loss: 2.465  acc: 0.249 
epoch: 11  loss: 2.325  acc: 0.308 
epoch: 12  loss: 2.295  acc: 0.343 
epoch: 13  loss: 2.266  acc: 0.350 
epoch: 14  loss: 2.349  acc: 0.330 
epoch: 15  loss: 2.271  acc: 0.340 
epoch: 16  loss: 2.323  acc: 0.321 
epoch: 17  loss: 2.264  acc: 0.352 
epoch: 18  loss: 2.276  acc: 0.344 
epoch: 19  loss: 2.287  acc: 0.349 
epoch: 20  loss: 2.244  acc: 0.376 
epoch: 21  loss: 2.197  acc: 0.398 
epoch: 22  loss: 2.183  acc: 0.414 
epoch: 23  loss: 2.172  acc: 0.423 
epoch: 24  loss: 2.150  acc: 0.435 
epoch: 25  loss: 2.207  acc: 0.414 
epoch: 26  loss: 2.140  acc: 0.441 
epoch: 27  loss: 2.199  acc: 0.423 
epoch: 28  loss: 2.125  acc: 0.453 
e