In [1]:
%matplotlib  inline

In [2]:
import os
import sys
import pickle
import torch
import time
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from torch.autograd import Variable
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
#import basic_cnn
from collections import OrderedDict #if import basic_cnn, this line can be dropped
from utils import AverageMeter, accuracy, get_margin

#os.environ['CUDA_VISIBLE_DEVICES'] = '1'

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

use_gpu = torch.cuda.is_available()

In [28]:
geom_tight_frame = torch.empty(18,3,3,dtype=torch.float)

In [29]:
geom_tight_frame[0,:,:]=torch.tensor([[1,2,1],[2,4,2],[1,2,1]],dtype=torch.float)/16
geom_tight_frame[1,:,:]=torch.tensor([[1,0,-1],[2,0,-2],[1,0,-1]],dtype=torch.float)/16
geom_tight_frame[2,:,:]=torch.tensor([[1,2,1],[0,0,0],[-1,-2,-1]],dtype=torch.float)/16
geom_tight_frame[3,:,:]=torch.tensor([[1,1,0],[1,0,-1],[0,-1,-1]],dtype=torch.float)*np.sqrt(2)/16
geom_tight_frame[4,:,:]=torch.tensor([[0,1,1],[-1,0,1],[-1,-1,0]],dtype=torch.float)*np.sqrt(2)/16
geom_tight_frame[5,:,:]=torch.tensor([[1,0,-1],[0,0,0],[-1,0,1]],dtype=torch.float)*np.sqrt(7)/24
geom_tight_frame[6,:,:]=torch.tensor([[-1,2,-1],[-2,4,-2],[-1,2,-1]],dtype=torch.float)/48
geom_tight_frame[7,:,:]=torch.tensor([[-1,-2,-1],[2,4,2],[-1,-2,-1]],dtype=torch.float)/48
geom_tight_frame[8,:,:]=torch.tensor([[0,0,-1],[0,2,0],[-1,0,0]],dtype=torch.float)/12
geom_tight_frame[9,:,:]=torch.tensor([[-1,0,0],[0,2,0],[0,0,-1]],dtype=torch.float)/12
geom_tight_frame[10,:,:]=torch.tensor([[0,1,0],[-1,0,-1],[0,1,0]],dtype=torch.float)*np.sqrt(2)/12
geom_tight_frame[11,:,:]=torch.tensor([[-1,0,1],[2,0,-2],[-1,0,1]],dtype=torch.float)*np.sqrt(2)/16
geom_tight_frame[12,:,:]=torch.tensor([[-1,2,-1],[0,0,0],[1,-2,1]],dtype=torch.float)*np.sqrt(2)/16
geom_tight_frame[13,:,:]=torch.tensor([[1,-2,1],[-2,4,-2],[1,-2,1]],dtype=torch.float)/48
geom_tight_frame[14,:,:]=torch.tensor([[0,0,0],[-1,2,-1],[0,0,0]],dtype=torch.float)*np.sqrt(2)/12
geom_tight_frame[15,:,:]=torch.tensor([[-1,2,-1],[0,0,0],[-1,2,-1]],dtype=torch.float)*np.sqrt(2)/24
geom_tight_frame[16,:,:]=torch.tensor([[0,-1,0],[0,2,0],[0,-1,0]],dtype=torch.float)*np.sqrt(2)/12
geom_tight_frame[17,:,:]=torch.tensor([[-1,0,-1],[2,0,2],[-1,0,-1]],dtype=torch.float)*np.sqrt(2)/24

In [31]:
geom_tight_frame = geom_tight_frame.reshape([18,1,1,3,3])
print('tight frame tensor: ',geom_tight_frame.size())

tight frame tensor:  torch.Size([18, 1, 1, 3, 3])


In [36]:
print(geom_tight_frame[0,0,0,:,:])

tensor([[ 0.0625,  0.1250,  0.0625],
        [ 0.1250,  0.2500,  0.1250],
        [ 0.0625,  0.1250,  0.0625]])


In [23]:
# How to construct a 5-layer CNN where each convolutional layer is a linear combination of geometric tight frames above?
class CNN_frame(nn.Module):
    def __init__(self, channels, filters, output_size, with_bn=True):
        super(CNN_frame, self).__init__()
        self.with_bn = with_bn
        self.features = self._make_layers(channels)
        self.classifier = nn.Linear(channels, output_size)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, channels):
        layers = []
        in_channels = 1 #in_channels = 3
        for i in range(1): #for i in range(5):
            if i == 0:
                if self.with_bn:
                    layers += [('conv%dB' % i, nn.Conv3d(in_channels, filters, [1,3,3], stride=2, padding=[0,1,1])),
                               ('conv%dW' % i, nn.Conv3d(filters, channels, [3,1,1], stride=1, padding=0)),
                               ('bn%d' % i, nn.BatchNorm2d(channels)),
                               ('relu%d' % i, nn.ReLU(inplace=True))]
                else:
                    layers += [('conv%dB' % i, nn.Conv3d(in_channels, filters, [1,3,3], stride=2, padding=[0,1,1])),
                               ('conv%dW' % i, nn.Conv3d(filters, channels, [3,1,1], stride=1, padding=0)),#('conv%d' % i, nn.Conv2d(in_channels, channels, 3, 2, 1)),
                               ('relu%d' % i, nn.ReLU(inplace=True))]
            else:
                if self.with_bn:
                    layers += [('conv%dB' % i, nn.Conv3d(in_channels, filters, [1,3,3], stride=2, padding=[0,1,1])),
                               ('conv%dW' % i, nn.Conv3d(filters, channels, [3,1,1], stride=1, padding=0)), #('conv%d' % i, nn.Conv2d(channels, channels, 3, 2, 1)),
                               ('bn%d' % i, nn.BatchNorm2d(channels)),
                               ('relu%d' % i, nn.ReLU(inplace=True))]
                else:
                    layers += [('conv%dB' % i, nn.Conv3d(in_channels, filters, [1,3,3], stride=2, padding=[0,1,1])),
                               ('conv%dW' % i, nn.Conv3d(filters, channels, [3,1,1], stride=1, padding=0)),#('conv%d' % i, nn.Conv2d(channels, channels, 3, 2, 1)),
                               ('relu%d' % i, nn.ReLU(inplace=True))]
        return nn.Sequential(OrderedDict(layers))

In [17]:
# Training with Cross-Entropy Loss
def train_model(model, criterion, optimizer, log_saver, num_epochs=100, margin_dist_ind=[]):
    since = time.time()
    steps = 0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        margin = []

        for phase in ['train', 'test']:

            loss_meter = AverageMeter()
            acc_meter = AverageMeter()

            if phase == 'train':
                model.train(True)
            else:
                model.train(False)

            for i, data in enumerate(loaders[phase]):
                inputs, labels = data
                if use_gpu:
                    inputs = inputs.cuda()
                    labels = labels.cuda()
                inputs = Variable(inputs)
                labels = Variable(labels)

                optimizer.zero_grad()
                
                # The inputs are of size [batch_size,in_channels,H,W], and is changed to [batch_size,1,in_channels,H,W]
                inputs = inputs.unsqueeze(1)
                
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)

                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                    steps += 1
                    margin = np.append(margin, get_margin(outputs, labels))

                loss_meter.update(loss.data.item(), outputs.size(0))
                acc_meter.update(accuracy(outputs.data, labels.data)[-1].item(), outputs.size(0))

            epoch_loss = loss_meter.avg
            epoch_error = 1 - acc_meter.avg / 100

            if phase == 'train':

                log_saver['train_loss'].append(epoch_loss)
                log_saver['train_error'].append(epoch_error)
                log_saver['margin'].append(min(margin))
                #log_saver['margin'].append(margin)
                ww = 1
                for i in range(6):
                    if i <= 4:
                        size = eval('model.features.conv%d.weight.size()' % i)
                        # here to compute the F norm between each CNN layer
                        #w_norm = eval('model.features.conv%d.weight.view(size[0],-1).pow(2).sum(1).mean().data.item()' % i)
                        #model.features.conv0.weight*model.features.bn0.weight[:,None,None,None]/(model.features.bn0.running_var.sqrt()[:,None,None,None]+1e-5)
                        scaled_w = eval('model.features.conv%d.weight*model.features.bn%d.weight[:,None,None,None]/(model.features.bn%d.running_var.sqrt()[:,None,None,None]+1e-5)' % (i,i,i))
                        w_norm = scaled_w.view(size[0],-1).pow(2).sum().sqrt().data.item()

                        log_saver['w%d' % i].append(w_norm)
                    else:
                        w_norm = model.classifier.weight.norm(2).data.item()
                        log_saver['w_fc'].append(w_norm)
                    ww *= w_norm
                log_saver['normalised_margin'].append(log_saver['margin'][-1] / ww)
                if epoch in margin_dist_ind: 
                    log_saver['normalized_margin_dist'].append(margin/ww)
                    print('Normalized Margin Distribution saved.')

            elif phase == 'test':

                log_saver['test_loss'].append(epoch_loss)
                log_saver['test_error'].append(epoch_error)

            print('{} Loss: {:.4f} Error: {:.4f}'.format(
                phase, epoch_loss, epoch_error), end=' ' if phase == 'train' else '\n')
            if phase == 'train':
                print('w4_norm: {:.4f} Margin: {:.4f} Norm_margin: {:.4f}'.format(log_saver['w4'][-1],
                                                                                  log_saver['margin'][-1],
                                                                                  log_saver['normalised_margin'][-1]))

        if epoch % 30 == 0 or epoch == num_epochs - 1:
            print('Saving..')
            state = {
                'net': model,
                'epoch': epoch,
                'log': log_saver
            }

            if not os.path.isdir('checkpoint_CNN'):
                os.mkdir('checkpoint_CNN')
            torch.save(state, './checkpoint_CNN/ckpt_epoch_{}.t7'.format(epoch))

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    return model, log_saver


In [18]:
root = './'
lr = 0.01
BATCH_SIZE = 100
weight_decay = 0.

img_transforms = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize(
                                         (0.4914, 0.4822, 0.4465),
                                         (0.2023, 0.1994, 0.2010))])

In [19]:
training_dataset = datasets.CIFAR10(root, train=True, transform=img_transforms, download=True)
training_loader = DataLoader(training_dataset, BATCH_SIZE, shuffle=True, pin_memory=True)

testing_dataset = datasets.CIFAR10(root, train=False, transform=img_transforms)
testing_loader = DataLoader(testing_dataset, BATCH_SIZE, shuffle=False, pin_memory=True)

loaders = {'train': training_loader, 'test': testing_loader}

print('training data size:',len(training_dataset))
print('testing data size:',len(testing_dataset))

Files already downloaded and verified
training data size: 50000
testing data size: 10000


In [27]:
phase='test'
for i, data in enumerate(loaders[phase]):
    inputs, labels = data
    if use_gpu:
        inputs = inputs.cuda()
        labels = labels.cuda()
    inputs = Variable(inputs)
    labels = Variable(labels)

print('input size:',inputs.size())
inputs1 = inputs.unsqueeze(1)
print('input size changed:',inputs1.size())
geom_tight_frame = geom_tight_frame.unsqueeze(1)
print('tight frame tensor: ',geom_tight_frame.size())

input size: torch.Size([100, 3, 32, 32])
input size changed: torch.Size([100, 1, 3, 32, 32])
tight frame tensor:  torch.Size([18, 1, 1, 1, 3, 3])


In [24]:
log = {'num_params': [],
       'train_loss': [],
       'train_error': [],
       'test_loss': [],
       'test_error': [],
       'w0': [], 'w1': [], 'w2': [],
       'w3': [], 'w4': [], 'w_fc': [],
       'margin': [], 'normalised_margin': []}

# %% run the model
num_epochs = 100
channels = 20
filters = 18
outputs = 10
# here use with_bn to control batch normalisation
model = CNN_frame(channels, filters, outputs, with_bn=True)

number_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
log['num_params'].append(number_params)

if use_gpu:
    model = model.cuda()

print('number-of-parameters:',log['num_params'])
        
for name1, param1 in model.state_dict().items():
    print('All parameter_name:',name1)
    

for name, param in model.named_parameters(): #for name, param in model.state_dict().items():
    if param.requires_grad:
        print('Gradient updated parameter_name:',name)


number-of-parameters: [1530]
All parameter_name: features.conv0B.weight
All parameter_name: features.conv0B.bias
All parameter_name: features.conv0W.weight
All parameter_name: features.conv0W.bias
All parameter_name: features.bn0.weight
All parameter_name: features.bn0.bias
All parameter_name: features.bn0.running_mean
All parameter_name: features.bn0.running_var
All parameter_name: classifier.weight
All parameter_name: classifier.bias
Gradient updated parameter_name: features.conv0B.weight
Gradient updated parameter_name: features.conv0B.bias
Gradient updated parameter_name: features.conv0W.weight
Gradient updated parameter_name: features.conv0W.bias
Gradient updated parameter_name: features.bn0.weight
Gradient updated parameter_name: features.bn0.bias
Gradient updated parameter_name: classifier.weight
Gradient updated parameter_name: classifier.bias


In [44]:
eval('model.features.conv%dB.weight.data = geom_tight_frame' % 0)

SyntaxError: invalid syntax (<string>, line 1)

In [22]:
print('\n Basis size:', model.features.conv0B.weight.size())
print('\n Weight size:', model.features.conv0W.weight.size())


 Basis size: torch.Size([18, 1, 1, 3, 3])

 Weight size: torch.Size([20, 18, 3, 1, 1])


In [54]:
model.features.conv0B.weight.data = geom_tight_frame
model.features.conv0B.weight.requires_grad = False
model.features.conv0B.bias.data = torch.zeros([18],dtype=torch.float)
model.features.conv0B.bias.requires_grad = False

#print('bias size:', model.features.conv0B.bias.data.size())

print(model.features.conv0B.weight[16,0,0,:,:])
print('Update param: ',model.features.conv0W.weight[0,:,:,0,0])

tensor([[ 0.0000, -0.1179,  0.0000],
        [ 0.0000,  0.2357,  0.0000],
        [ 0.0000, -0.1179,  0.0000]])
Update param:  tensor([[ 0.0809,  0.1136, -0.0007],
        [-0.1312,  0.1008, -0.0868],
        [ 0.0713, -0.0295, -0.1310],
        [-0.0685,  0.0946,  0.1093],
        [-0.1068, -0.0426,  0.1303],
        [-0.1117,  0.0182, -0.0943],
        [-0.0957,  0.1203, -0.1233],
        [-0.0409, -0.0221,  0.1302],
        [ 0.0322,  0.0251,  0.0532],
        [-0.0153,  0.0702,  0.0986],
        [ 0.0910,  0.0878, -0.0403],
        [ 0.0624, -0.0884, -0.1307],
        [-0.0916, -0.1053,  0.0319],
        [ 0.1359,  0.0243,  0.0081],
        [-0.1241,  0.0882, -0.0490],
        [-0.0197,  0.0839, -0.1073],
        [ 0.0778,  0.0890, -0.1154],
        [-0.0541, -0.0547,  0.0075]], device='cuda:0')


In [56]:
print('number-of-parameters:',log['num_params'])
        
for name1, param1 in model.state_dict().items():
    print('All parameter_name:',name1)
    

for name, param in model.named_parameters(): #for name, param in model.state_dict().items():
    if param.requires_grad:
        print('Gradient updated parameter_name:',name)

number-of-parameters: [1530]
All parameter_name: features.conv0B.weight
All parameter_name: features.conv0B.bias
All parameter_name: features.conv0W.weight
All parameter_name: features.conv0W.bias
All parameter_name: features.bn0.weight
All parameter_name: features.bn0.bias
All parameter_name: features.bn0.running_mean
All parameter_name: features.bn0.running_var
All parameter_name: classifier.weight
All parameter_name: classifier.bias
Gradient updated parameter_name: features.conv0W.weight
Gradient updated parameter_name: features.conv0W.bias
Gradient updated parameter_name: features.bn0.weight
Gradient updated parameter_name: features.bn0.bias
Gradient updated parameter_name: classifier.weight
Gradient updated parameter_name: classifier.bias


In [55]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)

model, log = train_model(model, criterion, optimizer, log, num_epochs=1)


ValueError: optimizing a parameter that doesn't require gradients