In [24]:
import torch
import tensorly as tl
from tensorly.decomposition import partial_tucker
from tltorch import FactorizedConv
from torchvision.models import vgg11
import tltorch
import numpy as np
import sys
sys.path.append('../src/')
tl.set_backend('pytorch')

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
from models import vgg
from utils import factorizations

In [26]:
def count_params(net: torch.nn.Module) -> np.array:
    return sum(p.numel() for p in net.parameters() if p.requires_grad)

In [54]:
#model = vgg.VGG('VGG11')
model = vgg11(weights=True)
input_lastLayer = model.classifier[6].in_features
model.classifier[6] = torch.nn.Linear(input_lastLayer, 10)



In [55]:
n_param = count_params(model)
print(f'Number of parameters (before): {n_param}')

Number of parameters (before): 128807306


In [56]:
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [57]:
import copy

factorization = 'tucker'
rank = 0.75
vbmf = False
decompose_weights = True
td_init = not decompose_weights

decomposition_kwargs = {'init': 'random'} if factorization == 'cp' else {}
fixed_rank_modes = 'spatial' if factorization == 'tucker' else None
fact_model = copy.deepcopy(model)

for i, (name, module) in enumerate(model.named_modules()):
    if type(module) == torch.nn.modules.conv.Conv2d:
        if name == 'features.0':
            # Skip first layer
            continue
        print(f'factorizing: {name}')
        
        if vbmf:
            ranks = factorizations.estimate_ranks(module)
        elif rank is not None:
            ranks = rank
        else:
            weights = module.weight.data
            ranks = [weights.shape[0]//3, weights.shape[1]//3, weights.shape[2], weights.shape[3]]
        
        fact_layer = tltorch.FactorizedConv.from_conv(
            module, 
            rank=ranks, 
            decompose_weights=decompose_weights, 
            factorization=factorization,
            fixed_rank_modes=fixed_rank_modes,
            decomposition_kwargs=decomposition_kwargs,
        )
        
        if td_init:
            fact_layer.weight.normal_(0, td_init)
        layer, block = name.split('.')
        conv_to_replace = getattr(fact_model, layer)
        setattr(conv_to_replace, block, fact_layer)
    if type(module) == torch.nn.modules.linear.Linear:
        print(f'factorizing: {name}')
        fact_layer = tltorch.FactorizedLinear.from_linear(
            module, 
            n_tensorized_modes=3,
            rank=rank,
            factorization=factorization,
            decomposition_kwargs=decomposition_kwargs
        )
        layer, block = name.split('.')
        lin_to_replace = getattr(fact_model, layer)
        setattr(lin_to_replace, block, fact_layer)
        
n_param_fact = count_params(fact_model)
print(f'Number of parameters (after): {n_param_fact}')

factorizing: features.3
factorizing: features.6
factorizing: features.8
factorizing: features.11
factorizing: features.13
factorizing: features.16
factorizing: features.18
factorizing: classifier.0
factorizing: classifier.3
factorizing: classifier.6
Number of parameters (after): 90791421


In [58]:
fact_model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): FactorizedConv(
      in_channels=64, out_channels=128, kernel_size=(3, 3), rank=(94, 47, 3, 3), order=2, padding=[1, 1], 
      (weight): TuckerTensor(shape=(128, 64, 3, 3), rank=(94, 47, 3, 3))
    )
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): FactorizedConv(
      in_channels=128, out_channels=256, kernel_size=(3, 3), rank=(189, 94, 3, 3), order=2, padding=[1, 1], 
      (weight): TuckerTensor(shape=(256, 128, 3, 3), rank=(189, 94, 3, 3))
    )
    (7): ReLU(inplace=True)
    (8): FactorizedConv(
      in_channels=256, out_channels=256, kernel_size=(3, 3), rank=(195, 195, 3, 3), order=2, padding=[1, 1], 
      (weight): TuckerTensor(shape=(256, 256, 3, 3), rank=(195, 195, 3, 3))
    )


In [59]:
print(f'original number of parameters: {n_param}')
print(f'factorized number of parameters: {n_param_fact}')
print(f'before - after: {n_param - n_param_fact}')
print(f'compression ratio: {n_param / n_param_fact:.2f}')

original number of parameters: 128807306
factorized number of parameters: 90791421
before - after: 38015885
compression ratio: 1.42
