# Parameter sharing with Explicit Recursion

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
import sys 
import os
import time

from models.cifar100 import resnet
import utils

import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

### Model Test

In [2]:
model = resnet.ResNet34_SingleShared
file_weight = './checkpoint/CIFAR100-ResNet34_SingleShared-S32-U1-L10-22.20err.pth'

shared_rank=32
unique_rank = 1
    
testloader = utils.get_testdata('CIFAR100',"./data",batch_size=256)

device='cuda'

net = model(shared_rank, unique_rank)
net = net.to(device)

print(net)

ResNet_SingleShared(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (shared_basis_1): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): SkippableSequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock_SingleShared(
      (shared_basis): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (basis_conv1): Conv2d(64, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (ba

### freeze the parameters of the scaled-down model.

In [None]:
import copy
XX = copy.deepcopy(net.layer1[1].coeff_conv1.weight.data)
print(torch.equal(XX, net.layer1[1].coeff_conv1.weight.data))

In [None]:
print(net.state_dict().keys

In [None]:
# freeze all parameters
for param in net.parameters():
    param.requires_grad = False

# defreeze params of only being used by the high-performance model
num_blocks =[0, 3, 4, 6, 3]
for i in range(1,5): # Layers. Skip the first layer
    layer = getattr(net,"layer"+str(i))
    num_skip_blocks = int(num_blocks[i]/2)
    for j in range(num_skip_blocks+1, num_blocks[i]): # blocks. Skip the first block
        print("layer: %s, block: %s" %(i, j))
        layer[j].coeff_conv1.weight.requires_grad = True
        layer[j].coeff_conv2.weight.requires_grad = True
net.fc_scale.weight.requires_grad = True
net.fc_scale.bias.requires_grad = True

In [None]:
print(net.layer1[2].coeff_conv1.weight.requires_grad)