# 本文件的目标是：1.测试代码、模型有没有bug 2.看预训练vgg的特征提取是否有效
## 具体操作方法：
### 模型：在预训练好的vgg16上额外加了一（两）层全连接层，使其输出能分200类
### 数据集：原数据集太大了，对于一个仅用于测试评估的模型而言训练起来太慢，我（写了一个代码来完成这件事）抽出原数据集中1000张图片尝试模型能否拟合这个小数据；在这个小数据集上准确率78%上下
### 我的设备：2060s显卡（8G显存），32G运行内存

In [1]:
import os
from torch import nn
from torchvision.models import VGG16_Weights
import torchvision.models as models
import torch
import ssl
ssl._create_default_https_context = ssl._create_unverified_context # 这行代码为了  下载预训练权重时不出网络问题

cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if cuda else "cpu")
print('Using {} device'.format(device))

Using cuda:0 device


In [2]:
seed = 1008
torch.manual_seed(seed)
if cuda:
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

In [3]:
from torchvision.transforms import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image

smallBatchPath="."+os.sep+"smallBatch"
class mySmallBatchDataset(Dataset):
    def __init__(self):
        self.labelList=[]
        self.imgList=[]
        imgList=os.listdir(smallBatchPath)
        trans=transforms.ToTensor()
        for img in imgList:
            op=Image.open(smallBatchPath+os.sep+img).convert('RGB')  # 貌似这是jpg的图所以没有透明通道，所以.convert('RGB')与否不重要
            op=op.resize((224,224))
            op=trans(op)
            labelNum=int(img.split("(")[0])-1
            self.labelList.append(labelNum)
            self.imgList.append(op)
            # print(op.size())  # torch.Size([3, 500, 500])
        print(len(self.labelList))
        print(len(self.imgList))


    def __getitem__(self, index):
        return self.labelList[index],self.imgList[index]

    def __len__(self):
        return len(self.labelList)

In [4]:
smallBatchDataSet=mySmallBatchDataset()
smallBatchDataLoader=DataLoader(smallBatchDataSet,batch_size=100,shuffle=True)

2000
2000


In [7]:
import torch.nn.functional as F
class vgg16FineTuneModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model=models.vgg16(weights=VGG16_Weights.IMAGENET1K_V1).eval()
        # self.finalFC1=nn.Linear(in_features=1000, out_features=1000)
        self.finalFC2=nn.Linear(in_features=1000, out_features=200)

    def forward(self, x):
        with torch.no_grad():
            self.model.eval()
            x=self.model(x)
        # x=self.finalFC1(x)
        # x=F.relu(x)
        x=self.finalFC2(x)
        return x

In [8]:
md=vgg16FineTuneModel()

In [9]:
from torch import optim

def train(model,train_loader,epoch,learning_rate=0.1):
    optimizer = optim.SGD(model.parameters(), lr=learning_rate,momentum=0.9)
    lossFunction=nn.CrossEntropyLoss()
    model.to(device)
    for i in range(epoch):
        wrongNum=0
        rightNum=0
        print("epoch",i+1,"/",epoch)
        for batch_idx,(label,data) in enumerate(train_loader):
            data=data.to(device)
            label=label.to(device)
            optimizer.zero_grad()

            output=model.forward(data)

            loss=lossFunction(output,label)
            loss.backward()

            pre=output.argmax(dim=1)
            eqq=torch.eq(pre,label)
            summ=int(eqq.sum())
            rightNum+=summ
            wrongNum+=(len(eqq)-summ)

            if batch_idx%5==0:
                print("loss",loss)
                print("rightNum",rightNum,"wrongNum",wrongNum)
            optimizer.step()
        torch.save(model, 'vgg16finetuneSmallTrainSave.pth')
        print("TOTALrightNum",rightNum,"TOTALwrongNum",wrongNum)
        print("save")

In [10]:
# torch.cuda.empty_cache()
train(md,smallBatchDataLoader,epoch=5,learning_rate=0.3)

epoch 1 / 5
loss tensor(6.4105, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 0 wrongNum 100
loss tensor(130.9742, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 8 wrongNum 592
loss tensor(439.5861, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 26 wrongNum 1074
loss tensor(377.7731, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 51 wrongNum 1549
TOTALrightNum 76 TOTALwrongNum 1924
save
epoch 2 / 5
loss tensor(489.6523, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 16 wrongNum 84
loss tensor(514.6033, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 101 wrongNum 499
loss tensor(395.2779, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 183 wrongNum 917
loss tensor(664.1677, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 258 wrongNum 1342
TOTALrightNum 319 TOTALwrongNum 1681
save
epoch 3 / 5
loss tensor(471.7179, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 32 wrongNum 68
loss tensor(384.4083, device='cuda:0', grad_fn=<NllLo

In [11]:
# torch.cuda.empty_cache()
train(md,smallBatchDataLoader,epoch=10,learning_rate=0.1)

epoch 1 / 10
loss tensor(87.6870, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 34 wrongNum 66
loss tensor(41.9816, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 261 wrongNum 339
loss tensor(64.1470, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 539 wrongNum 561
loss tensor(101.6649, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 803 wrongNum 797
TOTALrightNum 1038 TOTALwrongNum 962
save
epoch 2 / 10
loss tensor(35.6325, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 55 wrongNum 45
loss tensor(74.1197, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 367 wrongNum 233
loss tensor(16.5538, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 688 wrongNum 412
loss tensor(79.5310, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 985 wrongNum 615
TOTALrightNum 1236 TOTALwrongNum 764
save
epoch 3 / 10
loss tensor(71.4622, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 74 wrongNum 26
loss tensor(23.8586, device='cuda:0', grad_fn=<NllLoss

In [12]:
# torch.cuda.empty_cache()
train(md,smallBatchDataLoader,epoch=10,learning_rate=0.05)

epoch 1 / 10
loss tensor(3.9938, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 88 wrongNum 12
loss tensor(2.9943, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 520 wrongNum 80
loss tensor(4.1496, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 963 wrongNum 137
loss tensor(2.7919, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 1418 wrongNum 182
TOTALrightNum 1775 TOTALwrongNum 225
save
epoch 2 / 10
loss tensor(5.5983, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 94 wrongNum 6
loss tensor(1.0939, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 543 wrongNum 57
loss tensor(1.7684, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 998 wrongNum 102
loss tensor(1.4070, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 1472 wrongNum 128
TOTALrightNum 1838 TOTALwrongNum 162
save
epoch 3 / 10
loss tensor(1.4778, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 91 wrongNum 9
loss tensor(2.1655, device='cuda:0', grad_fn=<NllLossBackward0>)
r

In [13]:
# torch.cuda.empty_cache()
train(md,smallBatchDataLoader,epoch=10,learning_rate=0.01)

epoch 1 / 10
loss tensor(0.1751, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 99 wrongNum 1
loss tensor(0.5041, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 590 wrongNum 10
loss tensor(0.3679, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 1076 wrongNum 24
loss tensor(0.0492, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 1569 wrongNum 31
TOTALrightNum 1964 TOTALwrongNum 36
save
epoch 2 / 10
loss tensor(0.0001, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 100 wrongNum 0
loss tensor(0.0491, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 596 wrongNum 4
loss tensor(0.0122, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 1094 wrongNum 6
loss tensor(0.0004, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 1591 wrongNum 9
TOTALrightNum 1987 TOTALwrongNum 13
save
epoch 3 / 10
loss tensor(8.4485e-05, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 100 wrongNum 0
loss tensor(0.0002, device='cuda:0', grad_fn=<NllLossBackward0>)
rig

In [14]:
# torch.cuda.empty_cache()
train(md,smallBatchDataLoader,epoch=10,learning_rate=0.001)

epoch 1 / 10
loss tensor(4.9719, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 98 wrongNum 2
loss tensor(0.0004, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 595 wrongNum 5
loss tensor(8.3565e-07, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 1094 wrongNum 6
loss tensor(2.8242e-05, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 1591 wrongNum 9
TOTALrightNum 1991 TOTALwrongNum 9
save
epoch 2 / 10
loss tensor(0.0005, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 100 wrongNum 0
loss tensor(4.6249e-05, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 598 wrongNum 2
loss tensor(3.7838, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 1095 wrongNum 5
loss tensor(0.0091, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 1593 wrongNum 7
TOTALrightNum 1991 TOTALwrongNum 9
save
epoch 3 / 10
loss tensor(2.9443, device='cuda:0', grad_fn=<NllLossBackward0>)
rightNum 99 wrongNum 1
loss tensor(1.3185e-05, device='cuda:0', grad_fn=<NllLossBackward0

In [15]:
torch.save(md, 'vgg16finetuneSmall.pth')

# 结论：代码可以拟合小训练集，没有致命问题，可以考虑在整个训练集上运行