In [35]:
import torch
import torchvision
import torch.nn.functional as F
from torch.nn import Conv2d, MaxPool2d
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import ReLU
from torch.nn import Linear
from torch.utils.tensorboard import SummaryWriter

In [36]:
class Wisdom(nn.Module):
    def __init__(self):
        super(Wisdom,self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=6,kernel_size=3,stride=1,padding=0)
        # 3 为输入通道数，6 为输出通道数,表示6个卷积核进行处理得到六个输出，3 为卷积核大小
        self.maxpool1 = MaxPool2d(kernel_size=3,ceil_mode=False)
        self.relu1 =ReLU() #默认 inplace=False，即不改变原数据
        self.linear1 = Linear(196608,10)

    def forward(self, x):
        x=self.conv1(x)
        return x
    def maxpool(self,x):
        output = self.maxpool1(x)
        return output
    def relu(self,x):
        output = self.relu1(x)
        return output
    def linear(self,x):
        output = self.linear1(x)
        return output

学习module的使用，定义一个类

In [5]:
#input 为输入图像
input = torch.tensor([[1,2,0,3,1],
                      [0,1,2,3,1],
                      [1,2,1,0,0],
                      [5,2,3,1,1],
                      [2,1,0,1,1]])

# kernel 为卷积核，
kernel = torch.tensor([[1,2,1],
                       [0,1,0],
                       [2,1,0]])

input = torch.reshape(input,(1,1,5,5))  # 1 为batch_size, 1 为通道数
# batch_size指一次训练迭代中，模型处理的样本数量，通道数表示特征图的数量

kernel = torch.reshape(kernel,(1,1,3,3))  

output = F.conv2d(input, kernel, stride=1) # stride为步长,表示卷积核每次移动的距离
print(output)

output2 = F.conv2d(input, kernel, stride=2) # 卷积核每次移动的距离为2
print(output2)

output3 = F.conv2d(input, kernel, stride=1, padding=1) # padding为填充, 1表示在图像周围填充一圈0,再进行卷积操作
print(output3)

tensor([[[[10, 12, 12],
          [18, 16, 16],
          [13,  9,  3]]]])
tensor([[[[10, 12],
          [13,  3]]]])
tensor([[[[ 1,  3,  4, 10,  8],
          [ 5, 10, 12, 12,  6],
          [ 7, 18, 16, 16,  8],
          [11, 13,  9,  3,  4],
          [14, 13,  9,  7,  4]]]])



### 学习卷积操作
$f(t)g(x-t)dt$ $f(t)$是变化函数，给定的输入，$g(x-t)$是卷积核，代表对周围参数的影响，卷积核在输入上滑动，计算卷积核与输入的乘积

#### 卷积核作用：
<ol>
<li>对不稳定输入进行稳定输出，例如$f(x)$表示摄入食品与时间的关系，$g(x)$表示食品消化的量与时间的关系，对两个相乘函数求积分，代表胃容物总量与时间关系</li>
<li>图像经过卷积核处理，表示周围像素点如何对当前像素产生影响，从而进行平滑处理</li>
<li>看作对周围像素点的试探，从而得到更好的特征提取</li>
</ol>

In [26]:
dataset = torchvision.datasets.CIFAR10(root='./dataset', train=False, transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)

writer = SummaryWriter("logs")
step=0
wisdom = Wisdom()
for data in dataloader:
    imgs, targets = data
    output = wisdom(imgs)
    print(imgs.shape)
    print(output.shape)
    # torch.Size([64, 3, 32, 32])
    writer.add_images("input",imgs,step)
    # torch.Size([64, 6, 30, 30]) -> [xxx,3,30,30] 因为彩色图像有3个通道，6个通道会报错，所以需要reshape
    # 多余部分进入batch_size
    output = torch.reshape(output,(-1,3,30,30))
    writer.add_images("output",output,step)
    step = step + 1


Files already downloaded and verified
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size

In [28]:
# maxpool 用法
input = torch.tensor([[1,2,0,3,1],
                      [0,1,2,3,1],
                      [1,2,1,0,0],
                      [5,2,3,1,1],
                      [2,1,0,1,1]])
input = torch.reshape(input,(-1,1,5,5))
print (input.shape)
output = wisdom.maxpool(input)
print(output)

writer = SummaryWriter("logs_maxpool")
step=0
for data in dataloader:
    imgs, targets = data
    output = wisdom.maxpool(imgs)
    writer.add_images("input",imgs,step)
    writer.add_images("output",output,step)
    step = step + 1

torch.Size([1, 1, 5, 5])
tensor([[[[2]]]])


### 最大池化操作
池化核在输入上滑动，每次取池化核覆盖区域的最大值，得到输出

默认stride=kernel_size,即池化核每次移动的距离为池化核大小

若最后一次滑动不足以覆盖整个区域，则依照Ceil_model,若为true，保留最后一次滑动的最大值；若为false，舍弃最后一次滑动的最大值

作用：用于减少数据维度和参数量，提高训练速度

In [32]:
# relu 用法
input = torch.tensor([[1,-0.5],
                      [-1,3]])
input = torch.reshape(input,(-1,1,2,2))
wisdom = Wisdom()
output = wisdom.relu(input)
print(output)

tensor([[[[1., 0.],
          [0., 3.]]]])


In [38]:
# linear 操作
dataset = torchvision.datasets.CIFAR10(root='./dataset', train=False, transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)
wisdom= Wisdom()
for data in dataloader:
    imgs, targets = data
    # output = torch.reshape(imgs,(1,1,1,-1))
    output = torch.flatten(imgs) #展平成一行
    output = torch.flatten(imgs)
    output = wisdom.linear1(output)
    print (output.shape)

Files already downloaded and verified
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x49152 and 196608x10)

### linear 线性层操作
$y = xA^T + b$，其中x为输入，A为权重，b为偏置
对一个尺寸5*5的输入，先 reshape 成 1*25，只有一层然后与权重相乘，经过线性层，得到输出