In [30]:
import torch
import torchvision
import torch.nn.functional as F
from torch.nn import Conv2d, MaxPool2d, Flatten, L1Loss, MSELoss
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import ReLU
from torch.nn import Linear
from torch.utils.tensorboard import SummaryWriter

In [None]:
class Wisdom(nn.Module):
    def __init__(self):
        super(Wisdom,self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=6,kernel_size=3,stride=1,padding=0)
        # 3 为输入通道数，6 为输出通道数,表示6个卷积核进行处理得到六个输出，3 为卷积核大小
        self.maxpool1 = MaxPool2d(kernel_size=3,ceil_mode=False)
        self.relu1 =ReLU() #默认 inplace=False，即不改变原数据
        self.linear1 = Linear(196608,10)

    def forward(self, x):
        x=self.conv1(x)
        return x
    def maxpool(self,x):
        output = self.maxpool1(x)
        return output
    def relu(self,x):
        output = self.relu1(x)
        return output
    def linear(self,x):
        output = self.linear1(x)
        return output

学习module的使用，定义一个类

In [None]:
#input 为输入图像
input = torch.tensor([[1,2,0,3,1],
                      [0,1,2,3,1],
                      [1,2,1,0,0],
                      [5,2,3,1,1],
                      [2,1,0,1,1]])

# kernel 为卷积核，
kernel = torch.tensor([[1,2,1],
                       [0,1,0],
                       [2,1,0]])

input = torch.reshape(input,(1,1,5,5))  # 1 为batch_size, 1 为通道数
# batch_size指一次训练迭代中，模型处理的样本数量，通道数表示特征图的数量

kernel = torch.reshape(kernel,(1,1,3,3))  

output = F.conv2d(input, kernel, stride=1) # stride为步长,表示卷积核每次移动的距离
print(output)

output2 = F.conv2d(input, kernel, stride=2) # 卷积核每次移动的距离为2
print(output2)

output3 = F.conv2d(input, kernel, stride=1, padding=1) # padding为填充, 1表示在图像周围填充一圈0,再进行卷积操作
print(output3)


### 学习卷积操作
$f(t)g(x-t)dt$ $f(t)$是变化函数，给定的输入，$g(x-t)$是卷积核，代表对周围参数的影响，卷积核在输入上滑动，计算卷积核与输入的乘积

#### 卷积核作用：
<ol>
<li>对不稳定输入进行稳定输出，例如$f(x)$表示摄入食品与时间的关系，$g(x)$表示食品消化的量与时间的关系，对两个相乘函数求积分，代表胃容物总量与时间关系</li>
<li>图像经过卷积核处理，表示周围像素点如何对当前像素产生影响，从而进行平滑处理</li>
<li>看作对周围像素点的试探，从而得到更好的特征提取</li>
</ol>

In [None]:
dataset = torchvision.datasets.CIFAR10(root='./dataset', train=False, transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)

writer = SummaryWriter("logs")
step=0
wisdom = Wisdom()
for data in dataloader:
    imgs, targets = data
    output = wisdom(imgs)
    print(imgs.shape)
    print(output.shape)
    # torch.Size([64, 3, 32, 32])
    writer.add_images("input",imgs,step)
    # torch.Size([64, 6, 30, 30]) -> [xxx,3,30,30] 因为彩色图像有3个通道，6个通道会报错，所以需要reshape
    # 多余部分进入batch_size
    output = torch.reshape(output,(-1,3,30,30))
    writer.add_images("output",output,step)
    step = step + 1


In [None]:
# maxpool 用法
input = torch.tensor([[1,2,0,3,1],
                      [0,1,2,3,1],
                      [1,2,1,0,0],
                      [5,2,3,1,1],
                      [2,1,0,1,1]])
input = torch.reshape(input,(-1,1,5,5))
print (input.shape)
output = wisdom.maxpool(input)
print(output)

writer = SummaryWriter("logs_maxpool")
step=0
for data in dataloader:
    imgs, targets = data
    output = wisdom.maxpool(imgs)
    writer.add_images("input",imgs,step)
    writer.add_images("output",output,step)
    step = step + 1

### 最大池化操作
池化核在输入上滑动，每次取池化核覆盖区域的最大值，得到输出

默认stride=kernel_size,即池化核每次移动的距离为池化核大小

若最后一次滑动不足以覆盖整个区域，则依照Ceil_model,若为true，保留最后一次滑动的最大值；若为false，舍弃最后一次滑动的最大值

作用：用于减少数据维度和参数量，提高训练速度

In [None]:
# relu 用法
input = torch.tensor([[1,-0.5],
                      [-1,3]])
input = torch.reshape(input,(-1,1,2,2))
wisdom = Wisdom()
output = wisdom.relu(input)
print(output)

In [None]:
# linear 操作
dataset = torchvision.datasets.CIFAR10(root='./dataset', train=False, transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)
wisdom= Wisdom()
for data in dataloader:
    imgs, targets = data
    # output = torch.reshape(imgs,(1,1,1,-1))
    output = torch.flatten(imgs) #展平成一行
    output = wisdom.linear1(output)
    print (output.shape)

### linear 线性层操作
$y = xA^T + b$，其中x为输入，A为权重，b为偏置

对一个尺寸5*5的输入，先 reshape 成 1*25，只有一层然后与权重相乘，经过线性层，得到输出

此处作用：将 [64,3,32,32] 的输入展平成一行  [1,1,1,196608] ，从而把特征拉平，进行线性输入

然后与权重相乘，得到输出

**torch.nn.Linear(in_features, out_features, bias=True)**

bias为偏置，默认为True,作用是在输出上加上一个常数

weight为权重，in_features为输入特征数，out_features为输出特征数

In [None]:
# 以 cifar10 数据集为例，展示卷积神经网络的训练过程
dataset = torchvision.datasets.CIFAR10(root='./dataset', train=False, transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=1)

class Cifar10(nn.Module):  #所有步骤都按照图片中的结构进行
    def __init__(self):
        super(Cifar10,self).__init__()
        # self.conv1 = Conv2d(3,32,5,padding=2)
        # self.maxpool1 = MaxPool2d(2)
        # self.conv2 = Conv2d(32,32,5,padding=2)
        # self.maxpool2 = MaxPool2d(2)
        # self.conv3= Conv2d(32,64,5,padding=2)
        # self.maxpool3 = MaxPool2d(2)
        # self.flatten = Flatten()
        # self.linear1 = Linear(64*4*4,64)
        # self.linear2 = Linear(64,10)

        # 简化写法,直接用Sequential将所有层连接起来
        self.model1 = nn.Sequential(
            Conv2d(3,32,5,padding=2),
            MaxPool2d(2),
            Conv2d(32,32,5,padding=2),
            MaxPool2d(2),
            Conv2d(32,64,5,padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(64*4*4,64),
            Linear(64,10)
        )

    def forward(self,x):
        # x = self.conv1(x)
        # x= self.maxpool1(x)
        # x = self.conv2(x)
        # x = self.maxpool2(x)
        # x = self.conv3(x)
        # x = self.maxpool3(x)
        # x = self.flatten(x)
        # x = self.linear1(x)
        # x = self.linear2(x)
        x=self.model1(x)
        return x

wisdom = Cifar10()
# print(wisdom)
# input = torch.randn(64,3,32,32)
'''
用来检验网络结构是否正确，若是发生报错，说明网络结构有问题
input = torch.randn(64,3,32,32)
output = wisdom(input)
print(output.shape)
'''

# writer = SummaryWriter("../logs_cifar10")
# writer.add_graph(wisdom,input)  #将网络结构写入tensorboard,graph用于展示网络结构，特别好用！
# writer.close()
loss = nn.CrossEntropyLoss()
# CrossEntropyLoss 用于多分类问题，计算交叉熵损失
# 公式为：$loss(x,class) = -log(\frac{exp(x[class])}{\sum_j exp(x[j])})$

for data in dataloader:
    imgs, targets = data
    output = wisdom(imgs)
    result_loss = loss(output,targets)
    result_loss.backward()
    print(result_loss)



Files already downloaded and verified


#### 以 $cifar10$ 数据集为例，展示卷积神经网络的训练过程 具体结构见文件夹内的图片
$ H_{out} = \frac{(H_{in} + 2 * padding[0] - dilation[0]*(kernel\_size[0]-1) -1}{stride[0]} + 1 $

根据这个公式计算具体参数，dilation为膨胀系数，默认为1，kernel_size为卷积核大小，stride为步长，padding为填充

$H_{in}$为输入特征图大小，$H_{out}$为输出特征图大小,二者已知，都为32,最后得到padding=2, stride=1

同理，经过计算得到后面步骤的参数


In [31]:
# loss function
inputs = torch.tensor([1,2,3],dtype=torch.float32)
targets = torch.tensor([1,2,5],dtype=torch.float32)

inputs = torch.reshape(inputs,(1,1,1,3))
targets = torch.reshape(targets,(1,1,1,3))

loss = L1Loss(reduction='mean')
result = loss(inputs,targets)

loss_mse = MSELoss(reduction='mean')
result_mse = loss_mse(inputs,targets)
# mse 用来计算均方误差，l1 用来计算绝对值误差
print(result)
print(result_mse)

tensor(0.6667)
tensor(1.3333)


### loss function
<li> 计算实际输出和目标之间的差距 </li>
<li> 为我们更新输出提供一定的依据（反向传播） </li>