# Parameter sharing with Explicit Recursion

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
import sys 
import os
import time

from models.cifar100 import resnet
import utils

import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

In [3]:
### Model FLOPs & Param Test

In [9]:
from ptflops import get_model_complexity_info

model = resnet.ResNet34_SingleSharedmodel = resnet.ResNet34_SingleShared
file_weight = './checkpoint/CIFAR100-ResNet34_SingleShared-S32-U1-L10.0-phase5-3.pth'

testloader = utils.get_testdata('CIFAR100',"./data",batch_size=256)

with torch.cuda.device(0):
  net = model(32, 1)
  net = net.to('cuda')
  checkpoint = torch.load(file_weight)
  net.load_state_dict(checkpoint['net_state_dict'])

  macs, params = get_model_complexity_info(net, (3, 32, 32), as_strings=True,
                                           print_per_layer_stat=True, verbose=False)
  print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
  print('{:<30}  {:<8}'.format('Number of parameters: ', params))

fine=True, track_running_stats=True)
      (coeff_conv2): Conv2d(0.002 M, 0.027% Params, 0.002 GMac, 0.279% MACs, 33, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(0.0 M, 0.002% Params, 0.0 GMac, 0.017% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )
    )
  )
  (shared_basis_2): Conv2d(0.074 M, 0.926% Params, 0.113 GMac, 14.608% MACs, 128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer2): SkippableSequential(
    0.527 M, 6.619% Params, 0.416 GMac, 53.630% MACs, 
    (0): BasicBlock(
      0.23 M, 2.891% Params, 0.059 GMac, 7.600% MACs, 
      (conv1): Conv2d(0.074 M, 0.926% Params, 0.019 GMac, 2.435% MACs, 64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(0.0 M, 0.003% Params, 0.0 GMac, 0.008% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2

In [2]:
model = resnet.ResNet34_SingleShared
file_weight = './checkpoint/CIFAR100-ResNet34_SingleShared-S32-U1-L10.0-nofinetuned-1.pth'
file_weight2 = './checkpoint/CIFAR100-ResNet34_SingleShared-S32-U1-L10.0-1.pth'
shared_rank=32
unique_rank = 1
    
testloader = utils.get_testdata('CIFAR100',"./data",batch_size=256)

device='cuda'

net = model(shared_rank, unique_rank)
net = net.to(device)
checkpoint = torch.load(file_weight)
net.load_state_dict(checkpoint['net_state_dict'])

net2 = model(shared_rank, unique_rank)
net2 = net2.to(device)
checkpoint = torch.load(file_weight2)
net2.load_state_dict(checkpoint['net_state_dict'])


FileNotFoundError: [Errno 2] No such file or directory: './checkpoint/CIFAR100-ResNet34_SingleShared-S32-U1-L10.0-nofinetuned-1.pth'

In [25]:
print(torch.equal(net.layer3[1].coeff_conv1.weight.data, net2.layer3[1].coeff_conv1.weight.data))
print(torch.equal(net.fc_skip.weight.data, net2.fc_skip.weight.data))

True
True


In [22]:
for p in net2.layer3[3].coeff_conv1.parameters():
    print(p)

Parameter containing:
tensor([[[[-8.6031e-04]],

         [[ 2.7997e-03]],

         [[-1.4403e-03]],

         ...,

         [[ 2.5615e-03]],

         [[-2.6530e-03]],

         [[ 2.0235e-03]]],


        [[[ 6.0741e-03]],

         [[-2.3647e-03]],

         [[-5.7773e-03]],

         ...,

         [[-1.1535e-03]],

         [[ 2.6554e-05]],

         [[ 2.8954e-03]]],


        [[[ 1.5489e-03]],

         [[ 2.9847e-03]],

         [[-1.0291e-03]],

         ...,

         [[-6.0833e-04]],

         [[ 8.0511e-04]],

         [[ 1.7967e-04]]],


        ...,


        [[[-7.5172e-04]],

         [[-2.3077e-04]],

         [[-6.0648e-03]],

         ...,

         [[ 2.9002e-03]],

         [[-4.2223e-03]],

         [[ 2.8131e-04]]],


        [[[ 7.7262e-04]],

         [[ 1.2863e-03]],

         [[ 1.1007e-03]],

         ...,

         [[-1.7288e-03]],

         [[ 2.7387e-04]],

         [[-6.2827e-04]]],


        [[[-3.2858e-03]],

         [[ 9.5407e-04]],

         [[ 3.

### freeze the parameters of the scaled-down model.

In [None]:
import copy
XX = copy.deepcopy(net.layer1[1].coeff_conv1.weight.data)
print(torch.equal(XX, net.layer1[1].coeff_conv1.weight.data))

In [None]:
# freeze all parameters
for param in net.parameters():
    param.requires_grad = False

# defreeze params of only being used by the high-performance model
num_blocks =[0, 3, 4, 6, 3]
for i in range(1,5): # Layers. Skip the first layer
    layer = getattr(net,"layer"+str(i))
    num_skip_blocks = int(num_blocks[i]/2)
    for j in range(num_skip_blocks, num_blocks[i]): # blocks. Skip the first block
        print("layer: %s, block: %s" %(i, j))
        layer[j].coeff_conv1.weight.requires_grad = True
        layer[j].coeff_conv2.weight.requires_grad = True
net.fc.weight.requires_grad = True
net.fc.bias.requires_grad = True

In [None]:
print(net.layer1[1].coeff_conv1.weight.requires_grad)