<a href="https://colab.research.google.com/github/zhihong1224/CNN_Demo/blob/master/hw7_Network_Compression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1 Architecture Design 

## 1.1 不同的卷积操作

##### 一般的卷积神经网络:
nn.Conv2d(in_chs,out_chs,kernel_size,stride,padding) 

##### Group Convolution，其中in_chs和out_chs必须能整除groups:
nn.Conv2d(in_chs,out_chs,kernel_size,stride,padding,groups=groups)

##### Depthwise Convolution，其中in_chs=out_chs=groups:
nn.Conv2d(in_chs,out_chs=in_chs,kernel_size,stride,padding,groups=in_chs)

##### Pointwise Convolution，其中kernel_size=1:
nn.Conv2dd(in_chs,out_chs,1)

## 1.2 数据

In [3]:
# Download dataset
!gdown --id '19CzXudqN58R3D-1G8KeFWk8UDQwlb8is' --output food-11.zip
# Unzip the files
!unzip food-11.zip

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
  inflating: food-11/training/4_165.jpg  
  inflating: food-11/training/5_376.jpg  
  inflating: food-11/training/2_691.jpg  
  inflating: food-11/training/0_541.jpg  
  inflating: food-11/training/3_482.jpg  
  inflating: food-11/training/0_227.jpg  
  inflating: food-11/training/5_410.jpg  
  inflating: food-11/training/4_603.jpg  
  inflating: food-11/training/8_341.jpg  
  inflating: food-11/training/5_1154.jpg  
  inflating: food-11/training/9_37.jpg  
  inflating: food-11/training/9_152.jpg  
  inflating: food-11/training/5_438.jpg  
  inflating: food-11/training/9_1287.jpg  
  inflating: food-11/training/8_369.jpg  
  inflating: food-11/training/2_1455.jpg  
  inflating: food-11/training/10_247.jpg  
  inflating: food-11/training/7_32.jpg  
  inflating: food-11/training/10_521.jpg  
  inflating: food-11/training/2_1333.jpg  
  inflating: food-11/training/2_861.jpg  
  inflating: food-11/training/0_569.jpg  
  inflating: food-11/training/6

In [4]:
from PIL import Image
from glob import glob
train_root='./food-11/testing'
train_imgs=glob(train_root+'/*.jpg')
print(len(train_imgs))

3347


In [5]:
if '_' in train_imgs[0].split('/')[-1]:
  print('ok')

In [6]:
import  numpy as np
img=np.array(Image.open(train_imgs[10]))

In [7]:
img.shape

(384, 512, 3)

In [33]:
from torch.utils.data import Dataset,DataLoader
import torchvision.transforms as transforms

class Get_dataset(Dataset):
  def __init__(self,root):
    self.imgs=glob(root+'/*.jpg')
    self.train_transform=transforms.Compose([
      transforms.Resize((128,128)),
      transforms.RandomRotation(15),
      transforms.ToTensor()
    ])
    self.test_transform=transforms.Compose([
      transforms.Resize((128,128)),
      transforms.ToTensor()
    ])
  def __getitem__(self,item):
    img_file=self.imgs[item]
    imgname=img_file.split('/')[-1]
    if '_' in imgname:
      img=self.train_transform(Image.open(img_file))
      label=int(imgname.split('_')[0])
      return img,label
    else:
      img=self.test_transform(Image.open(img_file))
      return img
  def __len__(self):
    return len(self.imgs)

In [9]:
train_root='./food-11/training'
val_root='./food-11/validation'
test_root='./food-11/testing'

train_dataset=Get_dataset(train_root)
val_dataset=Get_dataset(val_root)
test_dataset=Get_dataset(test_root)

In [10]:
train_dataset[0][0].shape,train_dataset[0][1]
print(len(train_dataset),len(val_dataset),len(test_dataset))

9866 3430 3347


In [67]:
batch_size=128
train_iter=DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=4)
val_iter=DataLoader(val_dataset,batch_size=batch_size,shuffle=False,num_workers=4)
test_iter=DataLoader(test_dataset,batch_size=batch_size,shuffle=False,num_workers=4)

In [62]:
for X,Y in train_iter:
  print(X.shape,Y.shape)
  break

torch.Size([128, 3, 128, 128]) torch.Size([128])


## 1.3 Model

In [51]:
import torch
from torch import nn,optim
import torch.nn.functional as F

def convblock(in_chs,out_chs,kernel_size=3,stride=1,padding=1,down=True):
  layers=[
      nn.Conv2d(in_chs,in_chs,kernel_size,stride,padding,groups=in_chs),
      nn.BatchNorm2d(in_chs),
      nn.ReLU6(),
      nn.Conv2d(in_chs,out_chs,1)]
  if down:
    layers.append(nn.MaxPool2d(2,2,0))
  return nn.Sequential(*layers)

class StudentNet(nn.Module):
  def __init__(self,base=16,width_mult=1):
    super().__init__()
    multiplier=[1,2,4,8,16,16,16,16]
    bandwidth=[base*m for m in multiplier]
    
    for i in range(3,7):
      bandwidth[i]=int(bandwidth[i]*width_mult)
    
    layers=[
      nn.Sequential(
        nn.Conv2d(3,bandwidth[0],3,1,1),
        nn.BatchNorm2d(bandwidth[0]),
        nn.ReLU6(),
        nn.MaxPool2d(2,2,0), 
      )
    ]

    for i in range(len(bandwidth)-1):
      if i<3:
        layers.append(convblock(bandwidth[i],bandwidth[i+1]))
      else:
        layers.append(convblock(bandwidth[i],bandwidth[i+1],down=False))
    
    layers.append(nn.AdaptiveAvgPool2d((1,1)))
    
    self.cnn=nn.Sequential(*layers)

    self.fc=nn.Sequential(nn.Linear(bandwidth[7],11))
  def forward(self,x):
    feature=self.cnn(x)  #(batch_size,bandwidth[7],1,1)
    feature=feature.view(feature.shape[0],-1)
    return self.fc(feature) #(batch_size,11)

## 1.4 训练

In [12]:
def train(epochs,lr,model,train_iter,val_iter):
  model=model.to(device)

  criterion=nn.CrossEntropyLoss()
  optimizer=optim.Adam(model.parameters(),lr=lr)

  for epoch in range(epochs):
    train_loss,train_acc,val_loss,val_acc,n_train,n_test=0.,0.,0.,0.,0,0
    model.train()
    for X,Y in train_iter:
      X=X.to(device)
      Y=Y.to(device)
      y_pred=model(X)  #(batch_size,11)
      loss=criterion(y_pred,Y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      train_loss+=loss.item()*Y.shape[0]
      n_train+=Y.shape[0]
      train_acc+=(y_pred.argmax(dim=1)==Y).sum()

    model.eval()
    with torch.no_grad():
      for X,Y in val_iter:
        X=X.to(device)
        Y=Y.to(device)
        y_pred=model(X)
        val_loss+=criterion(y_pred,Y)*Y.shape[0]
        n_test+=Y.shape[0]
        val_acc+=(y_pred.argmax(dim=1)==Y).sum()

    print('epoch:%d | train loss:%.3f,train acc:%.3f | val loss:%.3f,val acc:%.3f'\
         %(epoch+1,train_loss/n_train,train_acc/n_train,val_loss/n_test,val_acc/n_test))


In [73]:
epochs,lr=30,0.003
model=StudentNet()
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train(epochs,lr,model,train_iter,val_iter)

epoch:1 | train loss:1.906,train acc:0.333 | val loss:2.853,val acc:0.233
epoch:2 | train loss:1.551,train acc:0.466 | val loss:2.302,val acc:0.348
epoch:3 | train loss:1.383,train acc:0.518 | val loss:1.457,val acc:0.510
epoch:4 | train loss:1.237,train acc:0.569 | val loss:1.628,val acc:0.488
epoch:5 | train loss:1.131,train acc:0.604 | val loss:1.520,val acc:0.492
epoch:6 | train loss:1.041,train acc:0.650 | val loss:1.618,val acc:0.507
epoch:7 | train loss:0.972,train acc:0.666 | val loss:1.492,val acc:0.537
epoch:8 | train loss:0.918,train acc:0.684 | val loss:1.739,val acc:0.478
epoch:9 | train loss:0.854,train acc:0.704 | val loss:1.532,val acc:0.534
epoch:10 | train loss:0.814,train acc:0.720 | val loss:1.808,val acc:0.502
epoch:11 | train loss:0.811,train acc:0.720 | val loss:1.508,val acc:0.560
epoch:12 | train loss:0.717,train acc:0.752 | val loss:1.662,val acc:0.533
epoch:13 | train loss:0.631,train acc:0.786 | val loss:1.335,val acc:0.604
epoch:14 | train loss:0.606,train 

# 2 Knowledge Distillation

In [13]:
import torchvision.models as models

In [14]:
def loss_fn_kd(outputs,labels,teacher_outputs,T=20,alpha=0.5):
  hard_loss=F.cross_entropy(outputs,labels)*(1.-alpha)
  soft_loss=nn.KLDivLoss(reduction='batchmean')(F.log_softmax(outputs/T,dim=1),F.softmax(teacher_outputs/T,dim=1))*(alpha*T*T)
  return hard_loss+soft_loss

In [15]:
batch_size=32
train_iter=DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=4)
val_iter=DataLoader(val_dataset,batch_size=batch_size,shuffle=False,num_workers=4)
test_iter=DataLoader(test_dataset,batch_size=batch_size,shuffle=False,num_workers=4)

In [16]:
!gdown --id '1B8ljdrxYXJsZv2vmTequdPOofp3VF3NN' --output teacher_resnet18.bin

Downloading...
From: https://drive.google.com/uc?id=1B8ljdrxYXJsZv2vmTequdPOofp3VF3NN
To: /content/teacher_resnet18.bin
0.00B [00:00, ?B/s]4.72MB [00:00, 20.5MB/s]22.0MB [00:00, 27.9MB/s]34.1MB [00:00, 33.7MB/s]44.8MB [00:00, 79.3MB/s]


In [23]:
teacher_net=models.resnet18(pretrained=False,num_classes=11).cuda()
student_net=StudentNet().cuda()
teacher_net.load_state_dict(torch.load(f'./teacher_resnet18.bin'))
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [24]:
def train(teacher_net,student_net,epochs,lr,train_iter,val_iter):
  teacher_net.eval()
  optimizer=optim.AdamW(student_net.parameters(),lr=lr)
  for epoch in range(epochs):
    train_loss,train_acc,train_n=0.0,0.0,0
    val_loss,val_acc,val_n=0.0,0.0,0
    student_net.train()
    for X,Y in train_iter:
      X=X.to(device)
      Y=Y.to(device)
      soft_label=teacher_net(X)
      logits=student_net(X)
      loss=loss_fn_kd(logits,Y,soft_label)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      train_loss+=loss.item()*Y.shape[0]
      train_acc+=(logits.argmax(dim=1)==Y).sum()
      train_n+=Y.shape[0]
    
    student_net.eval()
    with torch.no_grad():
      for X,Y in val_iter:
        X=X.to(device)
        Y=Y.to(device)
        soft_label=teacher_net(X)
        logits=student_net(X)
        loss=loss_fn_kd(logits,Y,soft_label)

        val_loss+=loss.item()*Y.shape[0]
        val_n+=Y.shape[0]
        val_acc+=(logits.argmax(dim=1)==Y).sum()
      
    print('epoch:%d | train loss:%.3f,train acc:%.3f | val loss:%.3f,val acc:%.3f'%\
          (epoch+1,train_loss/train_n,train_acc/train_n,val_loss/val_n,val_acc/val_n))


In [25]:
epochs,lr=30,0.001
train(teacher_net,student_net,epochs,lr,train_iter,val_iter)

epoch:1 | train loss:18.027,train acc:0.260 | val loss:16.771,val acc:0.352
epoch:2 | train loss:16.196,train acc:0.360 | val loss:15.797,val acc:0.385
epoch:3 | train loss:15.034,train acc:0.407 | val loss:15.321,val acc:0.413
epoch:4 | train loss:14.126,train acc:0.442 | val loss:15.296,val acc:0.449
epoch:5 | train loss:13.438,train acc:0.465 | val loss:15.045,val acc:0.470
epoch:6 | train loss:12.988,train acc:0.482 | val loss:15.649,val acc:0.395
epoch:7 | train loss:12.491,train acc:0.490 | val loss:13.984,val acc:0.465
epoch:8 | train loss:12.066,train acc:0.516 | val loss:14.895,val acc:0.420
epoch:9 | train loss:11.645,train acc:0.519 | val loss:13.799,val acc:0.450
epoch:10 | train loss:11.368,train acc:0.521 | val loss:13.939,val acc:0.476
epoch:11 | train loss:10.906,train acc:0.538 | val loss:13.224,val acc:0.493
epoch:12 | train loss:10.654,train acc:0.543 | val loss:13.603,val acc:0.456
epoch:13 | train loss:10.402,train acc:0.549 | val loss:13.687,val acc:0.494
epoch:14

# 3 Network Pruning

In [57]:
def network_slimming(old_model,new_model):
  params=old_model.state_dict()
  new_params=new_model.state_dict()

  selected_idx=[]

  for i in range(8):
    importance=params[f'cnn.{i}.1.weight']
    old_dim=len(importance)
    new_dim=len(new_params[f'cnn.{i}.1.weight'])
    ranking=torch.argsort(importance,descending=True)
    selected_idx.append(ranking[:new_dim])

  now_processed=1
  for (name,p1),(name2,p2) in zip(params.items(),new_params.items()):
    if name.startswith('cnn') and p1.size()!=torch.Size([]) and now_processed!=len(selected_idx):
      if name.startswith(f'cnn.{now_processed}.3'):
        now_processed+=1

      if name.endswith('3.weight'):
        if len(selected_idx)==now_processed:
          new_params[name]=p1[:,selected_idx[now_processed-1]]
        else:
          new_params[name]=p1[selected_idx[now_processed]][:,selected_idx[now_processed-1]]
      else:
        new_params[name]=p1[selected_idx[now_processed]]
    else:
      new_params[name]=p1
  new_model.load_state_dict(new_params)
  return new_model  

In [28]:
class Get_dataset(Dataset):
  def __init__(self,root):
    self.imgs=glob(root+'/*.jpg')
    self.train_transform=transforms.Compose([
      transforms.RandomCrop(256,pad_if_needed=True,padding_mode='symmetric'),
      transforms.RandomHorizontalFlip(),
      transforms.RandomRotation(15),
      transforms.ToTensor()
    ])
    self.test_transform=transforms.Compose([
      transforms.CenterCrop(256),
      transforms.ToTensor()
    ])
  def __getitem__(self,item):
    img_file=self.imgs[item]
    imgname=img_file.split('/')[-1]
    if '_' in imgname:
      img=self.train_transform(Image.open(img_file))
      label=int(imgname.split('_')[0])
      return img,label
    else:
      img=self.test_transform(Image.open(img_file))
      return img
  def __len__(self):
    return len(self.imgs)

In [29]:
train_dataset=Get_dataset(train_root)
val_dataset=Get_dataset(val_root)
test_dataset=Get_dataset(test_root)

In [54]:
batch_size=32
train_iter=DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=4)
val_iter=DataLoader(val_dataset,batch_size=batch_size,shuffle=False,num_workers=4)
test_iter=DataLoader(test_dataset,batch_size=batch_size,shuffle=False,num_workers=4)

In [46]:
!gdown --id '12wtIa0WVRcpboQzhgRUJOpcXe23tgWUL' --output student_custom_small.bin

Downloading...
From: https://drive.google.com/uc?id=12wtIa0WVRcpboQzhgRUJOpcXe23tgWUL
To: /content/student_custom_small.bin
  0% 0.00/1.05M [00:00<?, ?B/s]100% 1.05M/1.05M [00:00<00:00, 102MB/s]


In [53]:
net=StudentNet().cuda()
net.load_state_dict(torch.load('student_custom_small.bin'))

<All keys matched successfully>

In [55]:
criterion=nn.CrossEntropyLoss()
optimizer=optim.AdamW(net.parameters(),lr=1e-3)

In [61]:
def run_epoch(train_iter,update=True,alpha=0.5):
  total_num,total_hit,total_loss=0,0,0
  for X,Y in train_iter:
    X=X.to(device)
    Y=Y.to(device)
    logits=net(X)
    loss=criterion(logits,Y)
    if update:
      loss.backward()
      optimizer.step()
    total_hit+=torch.sum(torch.argmax(logits,dim=1)==Y).item()
    total_num+=Y.shape[0]
    total_loss+=loss.item()*Y.shape[0]
  return total_loss/total_num,total_hit/total_num

now_width_mult=1

for i in range(5):
  now_width_mult*=0.95
  new_net=StudentNet(width_mult=now_width_mult).cuda()
  params=net.state_dict()
  net=network_slimming(net,new_net)
  now_best_acc=0
  for epoch in range(5):
    net.train()
    train_loss,train_acc=run_epoch(train_iter,update=True)
    net.eval()
    val_loss,val_acc=run_epoch(val_iter,update=False)
    if val_acc>now_best_acc:
      now_best_acc=val_acc
      torch.save(net.state_dict(),f'custom_small_rate_{now_width_mult}.bin')
    print('rate {:6.4f} epoch {:>3d}:train loss:{:6.4f},acc {:6.4f} val loss:{:6.4f} acc {:6.4f}'.format(
        now_width_mult,epoch,train_loss,train_acc,val_loss,val_acc
    ))

rate 0.9500 epoch   0:train loss:0.4521,acc 0.8740 val loss:1.2411 acc 0.7531
rate 0.9500 epoch   1:train loss:0.4810,acc 0.8702 val loss:1.2154 acc 0.7472
rate 0.9500 epoch   2:train loss:0.4917,acc 0.8648 val loss:1.1955 acc 0.7671
rate 0.9500 epoch   3:train loss:0.5088,acc 0.8606 val loss:1.1888 acc 0.7641
rate 0.9500 epoch   4:train loss:0.5011,acc 0.8632 val loss:1.2141 acc 0.7595
rate 0.9025 epoch   0:train loss:0.5859,acc 0.8429 val loss:1.2406 acc 0.7402
rate 0.9025 epoch   1:train loss:0.5850,acc 0.8422 val loss:1.2113 acc 0.7501
rate 0.9025 epoch   2:train loss:0.6051,acc 0.8408 val loss:1.2179 acc 0.7507
rate 0.9025 epoch   3:train loss:0.5736,acc 0.8405 val loss:1.2231 acc 0.7516
rate 0.9025 epoch   4:train loss:0.5918,acc 0.8362 val loss:1.2399 acc 0.7446
rate 0.8574 epoch   0:train loss:0.6951,acc 0.8104 val loss:1.2786 acc 0.7254
rate 0.8574 epoch   1:train loss:0.7014,acc 0.8068 val loss:1.3146 acc 0.7204
rate 0.8574 epoch   2:train loss:0.6810,acc 0.8086 val loss:1.23

# 4 Weight Quantization

In [63]:
!gdown --id '12wtIa0WVRcpboQzhgRUJOpcXe23tgWUL' --output student_custom_small.bin

import os
import torch

print(f"\noriginal cost:{os.stat('student_custom_small.bin').st_size} bytes.")
params=torch.load('student_custom_small.bin')

Downloading...
From: https://drive.google.com/uc?id=12wtIa0WVRcpboQzhgRUJOpcXe23tgWUL
To: /content/student_custom_small.bin
  0% 0.00/1.05M [00:00<?, ?B/s]100% 1.05M/1.05M [00:00<00:00, 68.1MB/s]

original cost:1047430 bytes.


In [65]:
# 32-bit tensor->16-bit

import numpy as np
import pickle

def encode16(params,fname):
  custom_dict={}
  for (name,param) in params.items():
    param=np.float64(param.cpu().numpy())
    if type(param)==np.ndarray:
      custom_dict[name]=np.float16(param)
    else:
      custom_dict[name]=param
  pickle.dump(custom_dict,open(fname,'wb'))
def decode16(fname):
  params=pickle.load(open(fname,'rb'))
  custom_dict={}
  for (name,param) in params.items():
    param=torch.tensor(param)
    custom_dict[name]=param
  return custom_dict

encode16(params,'16_bit_model.pkl')
print(f"16-bit cost:{os.stat('16_bit_model.pkl').st_size} bytes.")

16-bit cost:522958 bytes.


In [68]:
# 32-bit tensor->8-bit 
def encode8(params,fname):
  custom_dict={}
  for (name,param) in params.items():
    param=np.float64(param.cpu().numpy())
    if type(param)==np.ndarray:
      min_val=np.min(param)
      max_val=np.max(param)
      param=np.round((param-min_val)/(max_val-min_val)*255)
      param=np.uint8(param)
      custom_dict[name]=(min_val,max_val,param)
    else:
      custom_dict[name]=param
  pickle.dump(custom_dict,open(fname,'wb'))

def decode8(fname):
  params=pickle.load(open(fname,'rb'))
  custom_dict={}
  for (name,param) in params.items():
    if type(param)==tuple:
      min_val,max_val,param=param
      param=np.float64(param)
      param=(param/255*(max_val-min_val))+min_val
      param=torch.tensor(param)
    else:
      param=torch.tensor(param)
    custom_dict[name]=param
  return custom_dict

encode8(params,'8_bit_model.pkl')
print(f"8-bit cost:{os.stat('8_bit_model.pkl').st_size} bytes.")

8-bit cost:268471 bytes.
