In [None]:
# 每个卷积核(不只是个矩阵，多通道)与输入层计算得到的输出层是个矩阵，卷积核的个数就是最终的输出层的通道数
# 也就是说  卷积核的channel与输入层的channel个数是相同的
# 输出的特征矩阵的个数(channel数)与卷积核的个数是相同的

In [None]:
# 1998 LeCun 提出的网络是 LeNet 两层卷积 三个全连接层 
# 灰度图像
# 很关键的一点是 在pytorch中Tensor的通道排序是 [batch, channel, height, width]

In [20]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

In [None]:
# 搭建模型 首先要定义一个类，这个类要继承nn.Module父类，类中实现两个方法，一、初始化函数，放置搭建网络中实现的网络层结构；二、正向传播函数，在里面实现反向传播的过程；
# 将类实例化的过程中，参数会传入到正向传播函数，按照forward函数中的顺序运行；

In [2]:
class LeNet(nn.Module):
    def __init__(self) :                   #定义初始化函数
        super(LeNet, self).__init__()      #定义模型过程中，继承了nn.Module类，super()可以解决多继承过程中调用父类的过程中可能出现的问题
        self.conv1 = nn.Conv2d(3, 16, 5)   #涉及到多继承，一般都会使用super()
        # 输入通道是3层，彩色图片，输出通道是16层，也就是有16个卷积核，卷积核大小是5*5
        self.pool1 = nn.MaxPool2d(2, 2)
        # 池化核为2*2，步长为2
        self.conv2 = nn.Conv2d(16, 32, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32*5*5, 120)
        # 三个全连接层，输入维度和输出维度,第一层全连接层之前需要展平
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))          # input (3, 32, 32) 计算一下输出维度(32-5+2*0)/1 + 1 = 28 输入32 核大小5 padding0 步长1 output (16, 28, 28)
        x = self.pool1(x)                  # output (16, 14, 14)
        x = F.relu(self.conv2(x))          # 计算一下输出维度(14-5+2*0)/1+1 = 10 output (32, 10,10)
        x = self.pool2(x)                  # output (32, 5, 5)
        x = x.view(-1, 32*5*5)             # output (32*5*5)
        # 展成一维向量 相当于reshape
        x = F.relu(self.fc1(x))            # output (120)
        x = F.relu(self.fc2(x))            # output (84)
        x = self.fc3(x)                    # output (10)
        return x


In [5]:
# 模型使用的数据集CIFAR-10 很小的图像 有10个类别
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))])

In [6]:
trainset = torchvision.datasets.CIFAR10(root='F:/download_dataset', train=True, download=False, transform=transform) #train=True表明导入的训练集 包含50000张训练图片
trainloader = torch.utils.data.DataLoader(trainset, batch_size=36, shuffle=True, num_workers=0) #windows下设置为0以外的其他数会报错

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to F:/download_dataset\cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting F:/download_dataset\cifar-10-python.tar.gz to F:/download_dataset


In [10]:
testnet = torchvision.datasets.CIFAR10(root='F:/download_dataset', train=False, download=False, transform=transform) #train=False表明导入的测试集, 包含10000张测试图片
testloader = torch.utils.data.DataLoader(testnet, batch_size=10000, shuffle=True, num_workers=0)

In [11]:
test_data_iter = iter(testloader)
test_image, test_label = test_data_iter.next()

In [28]:
classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [13]:
# 实例化模型
net = LeNet()
# 损失函数
loss_function = nn.CrossEntropyLoss()
# 优化器
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [17]:
for epoch in range(5):
    running_loss = 0
    for step, data in enumerate(trainloader, start=0):

        inputs, labels = data
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if step % 500 == 499:
            with torch.no_grad():
                # with范围内所有的计算都不会计算梯度
                outputs = net(test_image)
                predict_y = torch.max(outputs, dim=1)[1]
                accuracy = (predict_y == test_label).sum().item() / test_label.size(0)
                print(('[%d, %5d] train loss:%.3f test accuracy:%.3f')%(epoch+1, step+1, running_loss/500, accuracy))
                running_loss = 0
print("Finished training!")

[1,   500] train loss:1.292 test accuracy:0.552
[1,  1000] train loss:1.234 test accuracy:0.575
[2,   500] train loss:1.097 test accuracy:0.581
[2,  1000] train loss:1.066 test accuracy:0.621
[3,   500] train loss:0.972 test accuracy:0.639
[3,  1000] train loss:0.962 test accuracy:0.641
[4,   500] train loss:0.896 test accuracy:0.660
[4,  1000] train loss:0.886 test accuracy:0.657
[5,   500] train loss:0.819 test accuracy:0.660
[5,  1000] train loss:0.822 test accuracy:0.670
Finished training!


In [18]:
# 可以保存一下网络的参数
#save_path = 'F:/download_dataset/LeNet.pth'
#torch.save(net.state_dict(), save_path)

In [37]:
# 保存了参数之后，当有一张新的图片，我们可以加载参数，然后做预测
net = LeNet()
net.load_state_dict(torch.load('F:/download_dataset/LeNet.pth'))
im = Image.open('F:/download_dataset/ppww.png').convert('RGB')
transform = transforms.Compose(
    [transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))])
im = transform(im)
#缺少一个batch的维度，给它增加一个batch的维度
im = torch.unsqueeze(im, dim=0)

In [38]:
with torch.no_grad():
    outputs = net(im)
    predict = torch.max(outputs, dim=1)[1].data.numpy()
print(classes[int(predict)])

dog
