In [8]:
import numpy as np
np.set_printoptions(precision=1)
# import tensorflow as tf
import matplotlib.pylab as plt

from modules.utils import load_cifar10
# from modules.cnn_with_spectral_parameterization import CNN_Spectral_Param
# from modules.cnn_with_spectral_pooling import CNN_Spectral_Pool
from modules.image_generator import ImageGenerator

import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.module import Module

% matplotlib inline
% load_ext autoreload
% autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# In the interest of training time, we only used 1 of 5 cifar10 batches
# The important part of the experiment is to compare the rates of convergence of training accuracy,
# so subsetting the training dataset for both spectral and spatial models shouldn't impact
# the relationship between their train accuracy convergences
xtrain, ytrain, xtest, ytest = load_cifar10(1, channels_last=True)

file already downloaded..
getting batch 1


In [3]:
xtrain.shape, ytrain.shape, xtest.shape, ytest.shape

((10000, 32, 32, 3), (10000,), (10000, 32, 32, 3), (10000,))

# 1. Rewrite tensorflow model to pytorch

In [12]:
class generic_Net(nn.Module):
    def __init__(self, kernel_size):
        super(generic_Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, kernel_size, padding=(kernel_size-1)//2)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(96, 192, kernel_size, padding=(kernel_size-1)//2)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(8*8*192, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)
    
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, 8 * 8 * 192)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [5]:
kernel_size = 3
batch_size = 200
learning_rate = 1e-5
l2norm = 0.01
total_epoch = 20

if __name__ == '__main__':
    net = generic_Net(kernel_size).cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    
    img_gen = ImageGenerator(xtrain, ytrain)
    
    generator = img_gen.next_batch_gen(batch_size)
    iters = int(xtrain.shape[0] / batch_size)
    
    itr = 0
    for epoch in range(total_epoch):
        
        loss_iter = []
        acc_iter = []
        for itr in range(iters):
            itr += 1
            
            X_batch, y_batch = next(generator)
            inputs = Variable(torch.Tensor(X_batch.transpose(0,3,1,2)).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            
            outputs = net.forward(inputs)
            loss = criterion(outputs, labels)
            
            t = -1
            for w in net.parameters():
                t += 1
                if t % 2 == 1: # because parameters contain both bias and weights, only need weights here
                    continue
                loss += w.norm(2) * l2norm
            
            loss.backward()
            optimizer.step()
            
            _, predict = torch.max(outputs.data, 1)
            
            loss_iter.append(loss.data.cpu().numpy()[0])
            acc_iter.append(predict.eq(labels.data).cpu().sum() / batch_size)
        
        ave_loss = np.mean(loss_iter)
        ave_acc = np.mean(acc_iter)
        print('epoch: %d  loss: %.3f  acc: %.3f ' % (epoch + 1, ave_loss, ave_acc))

epoch: 1  loss: 2.749  acc: 0.163 
epoch: 2  loss: 2.645  acc: 0.210 
epoch: 3  loss: 2.575  acc: 0.209 
epoch: 4  loss: 2.837  acc: 0.176 
epoch: 5  loss: 2.747  acc: 0.171 
epoch: 6  loss: 2.645  acc: 0.178 
epoch: 7  loss: 2.666  acc: 0.143 
epoch: 8  loss: 2.645  acc: 0.132 
epoch: 9  loss: 2.592  acc: 0.220 
epoch: 10  loss: 2.503  acc: 0.271 
epoch: 11  loss: 2.414  acc: 0.272 
epoch: 12  loss: 2.512  acc: 0.257 
epoch: 13  loss: 2.627  acc: 0.245 
epoch: 14  loss: 2.762  acc: 0.196 
epoch: 15  loss: 2.507  acc: 0.236 
epoch: 16  loss: 2.519  acc: 0.251 
epoch: 17  loss: 2.558  acc: 0.234 
epoch: 18  loss: 2.563  acc: 0.226 
epoch: 19  loss: 2.540  acc: 0.257 
epoch: 20  loss: 2.495  acc: 0.276 


# 2. Spectral parameterization

In [21]:
import pytorch_fft.fft.autograd as fft

class SpectralParam(Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True):
        super(SpectralParam, self).__init__()
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.groups = groups
        
        self.fft = fft.Fft2d()
        
        self.weight_re = nn.Parameter(torch.Tensor(out_channels, in_channels, kernel_size, kernel_size), requires_grad=True)
        nn.init.xavier_uniform(self.weight_re)
#         self.weight_im = nn.Parameter(torch.Tensor(out_channels, in_channels, kernel_size, kernel_size), requires_grad=True)
#         nn.init.xavier_uniform(self.weight_im)
        if bias:
            self.bias = nn.Parameter(torch.Tensor(out_channels), requires_grad=True)
            nn.init.normal(self.bias)
        else:
            self.bias = None
            self.register_parameter('bias', None)
        
    def forward(self, input):
#         weight, _ = self.fft(self.weight_re, self.weight_im)
        weight, _ = self.fft(self.weight_re, torch.zeros_like(self.weight_re).cuda())
        result = F.conv2d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
        
        return result

### test layer

In [22]:
A = Variable(torch.arange(16).view(1, 1, 4, 4).cuda(), requires_grad=True)
model = SpectralParam(1, 3, 2).cuda()

B = model(A)
print(B)
C = torch.sum(B * B)
C.backward()
print(A.grad, model.weight_re.grad)

Variable containing:
(0 ,0 ,.,.) = 
   4.6083   5.4162   6.2241
   7.8398   8.6477   9.4556
  11.0713  11.8792  12.6870

(0 ,1 ,.,.) = 
   5.4215   5.9661   6.5107
   7.6000   8.1447   8.6893
   9.7786  10.3232  10.8678

(0 ,2 ,.,.) = 
   0.4238   1.9521   3.4805
   6.5372   8.0656   9.5940
  12.6507  14.1791  15.7075
[torch.cuda.FloatTensor of size 1x3x3x3 (GPU 0)]

Variable containing:
(0 ,0 ,.,.) = 
  -0.6970  -7.8828  -5.4189 -10.4712
  17.3574  30.4579  37.0283   8.3742
  41.1802  56.7395  63.3100  10.8331
  32.8472  61.3373  65.4439  27.0493
[torch.cuda.FloatTensor of size 1x1x4x4 (GPU 0)]
 Variable containing:
(0 ,0 ,.,.) = 
  5.3290e+03 -3.1132e+02
 -1.2453e+03 -1.2207e-04

(1 ,0 ,.,.) = 
  4.8425e+03 -2.9321e+02
 -1.1728e+03 -2.4414e-04

(2 ,0 ,.,.) = 
  5.6026e+03 -2.9036e+02
 -1.1614e+03  1.2207e-04
[torch.cuda.FloatTensor of size 3x1x2x2 (GPU 0)]



In [23]:
class generic_Net(nn.Module):
    def __init__(self, kernel_size):
        super(generic_Net, self).__init__()
        self.conv1 = SpectralParam(3, 96, kernel_size, padding=(kernel_size-1)//2)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = SpectralParam(96, 192, kernel_size, padding=(kernel_size-1)//2)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(8*8*192, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)
    
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, 8 * 8 * 192)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [18]:
kernel_size = 3
batch_size = 200
learning_rate = 1e-5
l2norm = 0.01
total_epoch = 100

if __name__ == '__main__':
    net = generic_Net(kernel_size).cuda()
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    
    img_gen = ImageGenerator(xtrain, ytrain)
    
    generator = img_gen.next_batch_gen(batch_size)
    iters = int(xtrain.shape[0] / batch_size)
    
    itr = 0
    for epoch in range(total_epoch):
        
        loss_iter = []
        acc_iter = []
        for itr in range(iters):
            itr += 1
            
            X_batch, y_batch = next(generator)
            inputs = Variable(torch.Tensor(X_batch.transpose(0,3,1,2)).cuda())
            labels = Variable(torch.LongTensor(y_batch).cuda())
            
            outputs = net.forward(inputs)
            loss = criterion(outputs, labels)
            
            t = -1
            for w in net.parameters():
                t += 1
                if t % 2 == 1: # because parameters contain both bias and weights, only need weights here
                    continue
                loss += w.norm(2) * l2norm
            
            loss.backward()
            optimizer.step()
            
            _, predict = torch.max(outputs.data, 1)
            
            loss_iter.append(loss.data.cpu().numpy()[0])
            acc_iter.append(predict.eq(labels.data).cpu().sum() / batch_size)
        
        ave_loss = np.mean(loss_iter)
        ave_acc = np.mean(acc_iter)
        print('epoch: %d  loss: %.3f  acc: %.3f ' % (epoch + 1, ave_loss, ave_acc))

epoch: 1  loss: 2.889  acc: 0.107 
epoch: 2  loss: 2.888  acc: 0.142 
epoch: 3  loss: 2.850  acc: 0.133 
epoch: 4  loss: 2.837  acc: 0.143 
epoch: 5  loss: 2.792  acc: 0.143 
epoch: 6  loss: 2.836  acc: 0.179 
epoch: 7  loss: 2.747  acc: 0.167 
epoch: 8  loss: 2.694  acc: 0.183 
epoch: 9  loss: 2.648  acc: 0.209 
epoch: 10  loss: 2.579  acc: 0.234 
epoch: 11  loss: 2.518  acc: 0.272 
epoch: 12  loss: 2.466  acc: 0.278 
epoch: 13  loss: 2.413  acc: 0.309 
epoch: 14  loss: 2.380  acc: 0.312 
epoch: 15  loss: 2.391  acc: 0.312 
epoch: 16  loss: 2.423  acc: 0.303 
epoch: 17  loss: 2.342  acc: 0.329 
epoch: 18  loss: 2.458  acc: 0.293 
epoch: 19  loss: 2.329  acc: 0.331 
epoch: 20  loss: 2.374  acc: 0.312 
epoch: 21  loss: 2.275  acc: 0.376 
epoch: 22  loss: 2.275  acc: 0.358 
epoch: 23  loss: 2.254  acc: 0.370 
epoch: 24  loss: 2.214  acc: 0.393 
epoch: 25  loss: 2.257  acc: 0.380 
epoch: 26  loss: 2.176  acc: 0.411 
epoch: 27  loss: 2.209  acc: 0.404 
epoch: 28  loss: 2.220  acc: 0.403 
e