<a href="https://colab.research.google.com/github/yanting04/getting-started/blob/master/DL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [9]:
import torch
import torchvision
import torchvision.transforms as transforms
import time
import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler
import torch.nn.functional as F
import torch.nn as nn
import torchvision.models as models
from torch.utils.data.sampler import SubsetRandomSampler
device =torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=512,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=512,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [10]:
class Vgg16_net(nn.Module):
    def __init__(self):
        super(Vgg16_net, self).__init__()


        self.layer1=nn.Sequential(
            nn.Conv2d(in_channels=3,out_channels=64,kernel_size=3,stride=1,padding=1), #(32-3+2)/1+1=32   32*32*64
            nn.BatchNorm2d(64),
            
            nn.ReLU(inplace=True),
            

            nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3,stride=1,padding=1), #(32-3+2)/1+1=32    32*32*64
            
            # On the one hand, the data distribution is made consistent, and on the other hand, the gradient disappears.
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),

            nn.MaxPool2d(kernel_size=2,stride=2)   #(32-2)/2+1=16         16*16*64
        )


        self.layer2=nn.Sequential(
            nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=1),  #(16-3+2)/1+1=16  16*16*128
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=128,out_channels=128,kernel_size=3,stride=1,padding=1), #(16-3+2)/1+1=16   16*16*128
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

            nn.MaxPool2d(2,2)    #(16-2)/2+1=8     8*8*128
        )

        self.layer3=nn.Sequential(
            nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3,stride=1,padding=1),  #(8-3+2)/1+1=8   8*8*256
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),


            nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,stride=1,padding=1),  #(8-3+2)/1+1=8   8*8*256
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,stride=1,padding=1),  #(8-3+2)/1+1=8   8*8*256
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),

            nn.MaxPool2d(2,2)     #(8-2)/2+1=4      4*4*256
        )

        self.layer4=nn.Sequential(
            nn.Conv2d(in_channels=256,out_channels=512,kernel_size=3,stride=1,padding=1),  #(4-3+2)/1+1=4    4*4*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,stride=1,padding=1),   #(4-3+2)/1+1=4    4*4*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,stride=1,padding=1),   #(4-3+2)/1+1=4    4*4*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.MaxPool2d(2,2)    #(4-2)/2+1=2     2*2*512
        )

        self.layer5=nn.Sequential(
            nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,stride=1,padding=1),   #(2-3+2)/1+1=2    2*2*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,stride=1,padding=1),  #(2-3+2)/1+1=2     2*2*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,stride=1,padding=1),  #(2-3+2)/1+1=2      2*2*512
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.MaxPool2d(2,2)   #(2-2)/2+1=1      1*1*512
        )


        self.conv=nn.Sequential(
            self.layer1,
            self.layer2,
            self.layer3,
            self.layer4,
            self.layer5
        )

        self.fc=nn.Sequential(
            
            nn.Linear(512,512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(512,256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(256,10)
        )


    def forward(self,x):
        x=self.conv(x)
        
        x = x.view(-1, 512)
        x=self.fc(x)
        return x


    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                torch.nn.init.xavier_normal_(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                torch.nn.init.normal_(m.weight.data, 0, 0.01)
                m.bias.data.zero_()
                

In [11]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1=nn.Sequential(   
            nn.Conv2d(   
                in_channels=3,   
                out_channels=16,   
                kernel_size=5,   
                stride=1,   
                padding=2,   #padding=(kernel_size-1)/2
            ),   #(3,32,32)-->(16,32,32)
            nn.ReLU(),   
            nn.MaxPool2d(kernel_size=2),   #  (16,32,32)-->(16,16,16)
        )
        self.conv2=nn.Sequential(   
            nn.Conv2d(16,32,5,1,2),  # (16,16,16) -->(32,16,16)
            nn.ReLU(),
            nn.MaxPool2d(2),   # (32,16,16)-->(32,8,8)
        )
        self.conv3=nn.Sequential(
            nn.Conv2d(32,64,5,1,2),   #(32,8,8)-->(64,8,8)
            nn.ReLU(),
            nn.MaxPool2d(2),   #(64,8,8)-->(64,4,4)
        )
        self.out=nn.Linear(64*4*4,10)   # （1，10）
    def forward(self,x):
        x=self.conv1(x)
        x=self.conv2(x)
        x=self.conv3(x)
        x=x.view(-1,64*4*4,)   #
        output=self.out(x)
        return output
   
    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                torch.nn.init.xavier_normal_(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                torch.nn.init.normal_(m.weight.data, 0, 0.01)
                m.bias.data.zero_()

In [12]:
device

device(type='cuda', index=0)

In [13]:
teacher_model = Vgg16_net().to(device)
student_model = Net().to(device)
teacher_model = torch.load("/content/gdrive/MyDrive/teacher.pth").to(device)#加载权重文件

In [None]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss().to(device)
criterion2 = nn.KLDivLoss().to(device)
optimizer = optim.Adam(student_model.parameters(),lr = 0.001)
for epoch in range(100):
    loss_sigma = 0.0
    correct = 0.0
    total = 0.0
    T=2
    alpha = 0.5
    for i, data in enumerate(trainloader,0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = student_model(inputs.float().to(device))
        loss1 = criterion(outputs.to(device), labels.to(device))
        teacher_outputs = teacher_model(inputs.float().to(device))
        outputs_S = F.softmax(outputs/T,dim=1)
        outputs_T = F.softmax(teacher_outputs/T,dim=1)
        loss2 = criterion2(outputs_S.to(device),outputs_T.to(device))*T*T
        loss = loss1*(1-alpha) + loss2*alpha
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(outputs.data, dim = 1)
        total += labels.size(0)
        correct += (predicted.cpu()==labels.cpu()).squeeze().sum().numpy()
        loss_sigma += loss.item()
    print('Epohc :{}  Loss:{:.2}   Acc:{:.2%}'.format(epoch ,loss_sigma, correct/total))
    loss_sigma = 0.0

  "reduction: 'mean' divides the total loss by both the batch size and the support size."


Epohc :0  Loss:7.9e+01   Acc:35.90%
Epohc :1  Loss:6.2e+01   Acc:48.30%
Epohc :2  Loss:5.4e+01   Acc:53.93%
Epohc :3  Loss:4.8e+01   Acc:58.11%
Epohc :4  Loss:4.3e+01   Acc:61.54%
Epohc :5  Loss:3.9e+01   Acc:63.85%
Epohc :6  Loss:3.6e+01   Acc:66.18%
Epohc :7  Loss:3.3e+01   Acc:67.80%
Epohc :8  Loss:3.1e+01   Acc:69.18%
Epohc :9  Loss:2.9e+01   Acc:70.85%
Epohc :10  Loss:2.7e+01   Acc:71.90%
Epohc :11  Loss:2.5e+01   Acc:73.04%
Epohc :12  Loss:2.3e+01   Acc:74.08%
Epohc :13  Loss:2.2e+01   Acc:74.72%
Epohc :14  Loss:2e+01   Acc:75.97%
Epohc :15  Loss:1.9e+01   Acc:77.13%
Epohc :16  Loss:1.7e+01   Acc:77.96%


In [6]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = student_model(images.float().to(device))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.cpu()==labels.cpu()).squeeze().sum().numpy()
print('Accuracy of the network on the test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the test images: 10 %


In [7]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = student_model(images.float().to(device))
        _, predicted = torch.max(outputs, 1)
        c = (predicted.cpu()==labels.cpu()).squeeze()
        for i in range(len(labels)):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane :  0 %
Accuracy of   car :  0 %
Accuracy of  bird :  0 %
Accuracy of   cat :  0 %
Accuracy of  deer :  0 %
Accuracy of   dog :  0 %
Accuracy of  frog :  0 %
Accuracy of horse :  0 %
Accuracy of  ship : 26 %
Accuracy of truck : 81 %
