In [46]:
%%javascript
$.getScript("http://120.78.95.32/j_n_contents.js")

<IPython.core.display.Javascript object>

<div id="toc">
</div>

# 5.1 卷积

In [27]:
import torch
from torch import nn
def corr2d(x,k):
    h,w=k.shape
    y=torch.zeros((x.shape[0]-h+1,x.shape[1]-w+1))
    for i in range(y.shape[0]):
        for j in range(y.shape[1]):
            y[i,j]=(x[i:i+h,j:j+w]*k).sum()
    return y

In [28]:
x=torch.tensor([[0,1,2],[3,4,5],[6,7,8]])
k=torch.tensor([[0,1],[2,3]])
corr2d(x,k)

tensor([[19., 25.],
        [37., 43.]])

In [41]:
class conv2d(nn.Module):
    def __init__(self,kernel_size):
        super(conv2d,self).__init__()
        self.weight=nn.Parameter(torch.randn(kernel_size))
        self.bias=nn.Parameter(torch.randn(1))
    def forward(self,x):
        return corr2d(x,self.weight)+self.bias

In [42]:
x=torch.ones(6,8)
x[:,2:6]=0
x

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [43]:
k=torch.tensor([[1,-1]],dtype=torch.float)

In [44]:
y=corr2d(x,k)
y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [46]:
conv=conv2d(kernel_size=(1,2))
step=20
lr=0.01
for i in range(step):
    y_hat=conv(x)
    l=((y_hat-y)**2).sum()
    l.backward()
    
    conv.weight.data-=lr*conv.weight.grad
    conv.bias.data-=lr*conv.bias.grad
    conv.weight.grad.fill_(0)
    conv.bias.grad.fill_(0)
    if (i+1)%5==0:
        print("step {},loss {}".format(i+1,l.item()))

step 5,loss 6.467115879058838
step 10,loss 1.7793506383895874
step 15,loss 0.4931490123271942
step 20,loss 0.13707450032234192


In [47]:
print(conv.weight.data)
print(conv.bias.data)

tensor([[ 0.9079, -0.9041]])
tensor([-0.0021])


# 5.2 填充和步幅

In [49]:
import torch
from torch import nn
def comp_conv2d(conv2d,x):
    x=x.view((1,1)+x.shape)
    y=conv2d(x)
    return y.view(y.shape[2:])

conv2d=nn.Conv2d(in_channels=1,out_channels=1,kernel_size=3,padding=1)
x=torch.rand(8,8)
comp_conv2d(conv2d,x).shape

torch.Size([8, 8])

In [51]:
conv2d=nn.Conv2d(in_channels=1,out_channels=1,kernel_size=(5,3),padding=(2,1))
comp_conv2d(conv2d,x).shape

torch.Size([8, 8])

In [52]:
conv2d=nn.Conv2d(1,1,kernel_size=3,padding=1,stride=2)
comp_conv2d(conv2d,x).shape

torch.Size([4, 4])

# 5.3 多输入输出

In [60]:
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l

In [61]:
def corr2d_multi_in(x,k):
    res=d2l.corr2d(x[0,:,:],k[0,:,:])
    for i in range(1,x.shape[0]):
        res+=d2l.corr2d(x[i,:,:],k[i,:,:])
    return res

In [72]:
x=torch.tensor([[[0,1,2],[3,4,5],[6,7,8]],[[1,2,3],[4,5,6],[7,8,9]]])
k=torch.tensor([[[0,1],[2,3]],[[1,2],[3,4]]])

In [73]:
x.shape,k.shape

(torch.Size([2, 3, 3]), torch.Size([2, 2, 2]))

In [74]:
corr2d_multi_in(x,k)

tensor([[ 56.,  72.],
        [104., 120.]])

In [75]:
def corr2d_multi_in_out(x,k):
    return torch.stack([corr2d_multi_in(x,a) for a in k])

In [76]:
k=torch.stack([k,k+1,k+2])
k.shape

torch.Size([3, 2, 2, 2])

In [77]:
corr2d_multi_in_out(x,k)

tensor([[[ 56.,  72.],
         [104., 120.]],

        [[ 76., 100.],
         [148., 172.]],

        [[ 96., 128.],
         [192., 224.]]])

In [78]:
def corr2d_multi_in_out_11(x,k):
    c_i,h,w=x.shape
    c_o=k.shape[0]
    x=x.view(c_i,h*w)
    k=k.view(c_o,c_i)
    y=torch.mm(k,x)
    return y.view(c_o,h,w)

In [79]:
x=torch.rand(3,3,3)
k=torch.rand(2,3,1,1)
y1=corr2d_multi_in_out_11(x,k)
y2=corr2d_multi_in_out(x,k)
(y1-y2).norm().item()

0.0

# 5.4 池化层

In [103]:
a=nn.MaxPool2d(kernel_size=2,stride=1)

In [104]:
x=torch.tensor([[[0,1,2],[3,4,5],[6,7,8]]],dtype=torch.float)

In [105]:
x.shape

torch.Size([1, 3, 3])

In [106]:
a(x)

tensor([[[4., 5.],
         [7., 8.]]])

In [107]:
b=nn.AvgPool2d(kernel_size=2,stride=1)

In [108]:
b(x)

tensor([[[2., 3.],
         [5., 6.]]])

In [109]:
c=nn.MaxPool2d((2,4),padding=(1,2),stride=(2,3))
c(x)

tensor([[[1., 2.],
         [7., 8.]]])

# 5.5 LeNet

In [112]:
import time
import torch
from torch import nn,optim

import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
device=torch.device("cuda")

In [113]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet,self).__init__()
        self.conv=nn.Sequential(
            nn.Conv2d(1,6,5),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(6,16,5),
            nn.Sigmoid(),
            nn.MaxPool2d(2,2)
        )
        self.fc=nn.Sequential(
            nn.Linear(16*4*4,120),
            nn.Sigmoid(),
            nn.Linear(120,84),
            nn.Sigmoid(),
            nn.Linear(84,10)
        )
    def forward(self,img):
        a=self.conv(img)
        output=self.fc(a.view(img.shape[0],-1))
        return output

In [114]:
net=LeNet()
print(net)

LeNet(
  (conv): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)


In [115]:
batch_size=256
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size=batch_size)

In [116]:
lr,num_epochs=0.001,5
optimizer=torch.optim.Adam(net.parameters(),lr=lr)

d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cuda
epoch 1, loss 1.9338, train acc 0.282, test acc 0.575, time 2.6 sec
epoch 2, loss 0.4817, train acc 0.632, test acc 0.675, time 2.5 sec
epoch 3, loss 0.2603, train acc 0.713, test acc 0.724, time 2.5 sec
epoch 4, loss 0.1737, train acc 0.739, test acc 0.742, time 2.6 sec
epoch 5, loss 0.1277, train acc 0.755, test acc 0.755, time 2.6 sec


# 5.6 AlexNet

In [117]:
import time
import torch
from torch import nn,optim
import torchvision
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
device=torch.device("cuda")

In [119]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet,self).__init__()
        self.conv=nn.Sequential(
            nn.Conv2d(1,96,11,4),
            nn.ReLU(),
            nn.MaxPool2d(3,2),
            nn.Conv2d(96,256,5,1,2),
            nn.ReLU(),
            nn.MaxPool2d(3,2),
            nn.Conv2d(256,384,3,1,1),
            nn.ReLU(),
            nn.Conv2d(384,384,3,1,1),
            nn.ReLU(),
            nn.Conv2d(384,256,3,1,1),
            nn.ReLU(),
            nn.MaxPool2d(3,2)
        )
        self.fc=nn.Sequential(
            nn.Linear(256*5*5,4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096,4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096,10)
        
        )
    def forward(self,img):
        a=self.conv(img)
        return self.fc(a.view(img.shape[0],-1))

In [120]:
net=AlexNet()
print(net)

AlexNet(
  (conv): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=6400, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (

In [121]:
batch_size=128
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size,resize=224)

In [122]:
lr,num_epochs=0.001,5
optimizer=torch.optim.Adam(net.parameters(),lr=lr)
d2l.train_ch5(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs)

training on  cuda
epoch 1, loss 0.6425, train acc 0.755, test acc 0.846, time 33.9 sec
epoch 2, loss 0.1767, train acc 0.868, test acc 0.881, time 34.6 sec
epoch 3, loss 0.1008, train acc 0.887, test acc 0.891, time 34.7 sec
epoch 4, loss 0.0692, train acc 0.898, test acc 0.893, time 34.7 sec
epoch 5, loss 0.0500, train acc 0.907, test acc 0.902, time 34.4 sec


# 5.7 VGG

In [123]:
import time
import torch
from torch import nn,optim
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
device=torch.device("cuda")

In [206]:
def block(num_convs,in_channels,out_channels):

    blk=[]
    for i in range(num_convs):
        if i==0:
            blk.append(nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1))
        else:
            blk.append(nn.Conv2d(out_channels,out_channels,kernel_size=3,padding=1))
        blk.append(nn.ReLU())
    blk.append(nn.MaxPool2d(kernel_size=2,stride=2))
    b=nn.Sequential(*blk)
    return b

In [207]:
class vgg_block(nn.Module):
    def __init__(self,num_convs,fc_features,fc_hidden_units):
        super(vgg_block,self).__init__()
        
        self.net=nn.Sequential()
        for j ,(num,i,o) in enumerate(num_convs):
            self.net.add_module('block_'+str(j+1),block(num,i,o))
        self.net.add_module("fc",nn.Sequential(
            d2l.FlattenLayer(),
            nn.Linear(fc_features,fc_hidden_units),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_hidden_units,fc_hidden_units),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_hidden_units,10)
        ))
        
        

    def forward(self,img):
        return self.net(img)
        

In [208]:
conv=((1,1,64),(1,64,128),(2,128,256),(2,256,512),(2,512,512))
fc_features=512*7*7
fc_hidden_units=4096

In [209]:
net=vgg_block(conv,fc_features,fc_hidden_units)

In [210]:
print(net)

vgg_block(
  (net): Sequential(
    (block_1): Sequential(
      (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (block_2): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (block_3): Sequential(
      (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU()
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (block_4): Sequential(
      (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU()
      

In [211]:
x=torch.rand(1,1,224,224)
for n,b in net.named_children():
    for t1,t2 in b.named_children():
        x=t2(x)
        print(t1,'output shape: ',x.shape)

block_1 output shape:  torch.Size([1, 64, 112, 112])
block_2 output shape:  torch.Size([1, 128, 56, 56])
block_3 output shape:  torch.Size([1, 256, 28, 28])
block_4 output shape:  torch.Size([1, 512, 14, 14])
block_5 output shape:  torch.Size([1, 512, 7, 7])
fc output shape:  torch.Size([1, 10])


In [212]:
ratio=8
small_conv_arch = [(1, 1, 64//ratio), (1, 64//ratio, 128//ratio), (2, 128//ratio, 256//ratio),
(2, 256//ratio, 512//ratio), (2, 512//ratio, 512//ratio)]
net=vgg_block(small_conv_arch,fc_features//ratio,fc_hidden_units//ratio)

In [213]:
batch_size=64
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size,resize=224)
lr,num_epochs=0.001,5
optimizer=torch.optim.Adam(net.parameters(),lr=lr)
net.to(torch.device("cuda"))
d2l.train_ch5(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs)

training on  cuda
epoch 1, loss 0.5973, train acc 0.777, test acc 0.873, time 36.7 sec
epoch 2, loss 0.1652, train acc 0.879, test acc 0.895, time 36.9 sec
epoch 3, loss 0.0946, train acc 0.898, test acc 0.895, time 36.8 sec
epoch 4, loss 0.0628, train acc 0.910, test acc 0.914, time 36.9 sec
epoch 5, loss 0.0460, train acc 0.916, test acc 0.916, time 36.9 sec


# 5.8 NiN

In [214]:
import time
import torch
from torch import nn,optim
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
device=torch.device("cuda")

In [215]:
def nin_block(in_channels,out_channels,kernel_size,stride,padding):
    blk=nn.Sequential(
        nn.Conv2d(in_channels,out_channels,kernel_size,stride,padding),
        nn.ReLU(),
        nn.Conv2d(out_channels,out_channels,kernel_size=1),
        nn.ReLU(),
        nn.Conv2d(out_channels,out_channels,kernel_size=1),
        nn.ReLU()
    )
    return blk


In [224]:
class nin(nn.Module):
    def __init__(self):
        super(nin,self).__init__()
        self.net=nn.Sequential(
            nin_block(1,96,11,4,0),
            nn.MaxPool2d(kernel_size=3,stride=2),
            nin_block(96,256,5,1,2),
            nn.MaxPool2d(kernel_size=3,stride=2),
            nin_block(256,384,3,1,1),
            nn.MaxPool2d(kernel_size=3,stride=2),
            nn.Dropout(0.5),
            nin_block(384,10,3,1,1),
            d2l.GlobalAvgPool2d(),
            d2l.FlattenLayer()
        )
    def forward(self,img):
        return self.net(img)

In [294]:
net=nin()
print(net)

nin(
  (net): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
      (1): ReLU()
      (2): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
      (3): ReLU()
      (4): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
      (5): ReLU()
    )
    (1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Sequential(
      (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (1): ReLU()
      (2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
      (3): ReLU()
      (4): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
      (5): ReLU()
    )
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
      (3): ReLU()
      (4): Conv2d(384, 384, kernel_size=(1, 1), stride=(1

In [295]:
x=torch.rand(1,1,224,224)
x.to(torch.device("cuda"))
for n,b in net.named_children():
    for t1,t2 in b.named_children():
        x=t2(x)
        print(t1,'output shape: ',x.shape)

0 output shape:  torch.Size([1, 96, 54, 54])
1 output shape:  torch.Size([1, 96, 26, 26])
2 output shape:  torch.Size([1, 256, 26, 26])
3 output shape:  torch.Size([1, 256, 12, 12])
4 output shape:  torch.Size([1, 384, 12, 12])
5 output shape:  torch.Size([1, 384, 5, 5])
6 output shape:  torch.Size([1, 384, 5, 5])
7 output shape:  torch.Size([1, 10, 5, 5])
8 output shape:  torch.Size([1, 10, 1, 1])
9 output shape:  torch.Size([1, 10])


In [228]:
batch_size=128
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size,resize=224)
lr,num_epochs=0.002,5
optimizer=torch.optim.Adam(net.parameters(),lr=lr)
d2l.train_ch5(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs)

training on  cuda
epoch 1, loss 1.3338, train acc 0.525, test acc 0.765, time 41.7 sec
epoch 2, loss 0.2932, train acc 0.788, test acc 0.810, time 41.9 sec
epoch 3, loss 0.1671, train acc 0.814, test acc 0.816, time 42.0 sec
epoch 4, loss 0.1146, train acc 0.829, test acc 0.831, time 42.1 sec
epoch 5, loss 0.0862, train acc 0.839, test acc 0.838, time 42.1 sec


# 5.9 GoogLeNet

In [75]:
import time
import torch
from torch import nn,optim
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
device=torch.device("cuda")

In [76]:
class Inception(nn.Module):
    def __init__(self,in_,c1,c2,c3,c4):
        super(Inception,self).__init__()
        self.p1_1=nn.Conv2d(in_,c1,kernel_size=1)
        self.p2_1=nn.Conv2d(in_,c2[0],kernel_size=1)
        self.p2_2=nn.Conv2d(c2[0],c2[1],kernel_size=3,padding=1)
        self.p3_1=nn.Conv2d(in_,c3[0],kernel_size=1)
        self.p3_2=nn.Conv2d(c3[0],c3[1],kernel_size=5,padding=2)
        self.p4_1=nn.MaxPool2d(kernel_size=3,stride=1,padding=1)
        self.p4_2=nn.Conv2d(in_,c4,kernel_size=1)
        self.p1=nn.Sequential(
            self.p1_1,
            nn.ReLU()
        )
        self.p2=nn.Sequential(
            self.p2_1,
            nn.ReLU(),
            self.p2_2,
            nn.ReLU()
        )
        self.p3=nn.Sequential(
            self.p3_1,
            nn.ReLU(),
            self.p3_2,
            nn.ReLU()
        )
        self.p4=nn.Sequential(
            self.p4_1,
            self.p4_2,
            nn.ReLU()
        )
    def forward(self,x):
        result1=self.p1(x)
        result2=self.p2(x)
        result3=self.p3(x)
        result4=self.p4(x)
        return torch.cat((result1,result2,result3,result4),dim=1)

In [107]:
class Googlenet(nn.Module):
    def __init__(self):
        super(Googlenet,self).__init__()
        
        self.b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        
        self.b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
        nn.Conv2d(64, 192, kernel_size=3, padding=1),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        
        self.b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32),
        Inception(256, 128, (128, 192), (32, 96), 64),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        
        self.b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),
        Inception(512, 160, (112, 224), (24, 64), 64),
        Inception(512, 128, (128, 256), (24, 64), 64),
        Inception(512, 112, (144, 288), (32, 64), 64),
        Inception(528, 256, (160, 320), (32, 128), 128),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        
        self.b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128),
        Inception(832, 384, (192, 384), (48, 128), 128),
        d2l.GlobalAvgPool2d())
        
        
        #self.t=nn.Sequential(
        #    self.b1, self.b2, self.b3, self.b4, self.b5,
        #    d2l.FlattenLayer(), 
        #    nn.Linear(1024, 10))

    def forward(self,img):
        img=img.to(torch.device("cuda"))
        t = nn.Sequential(
             self.b1, self.b2, self.b3, self.b4, self.b5,
             d2l.FlattenLayer(), 
             nn.Linear(1024, 10)
        )
        t=t.to(torch.device("cuda"))
        #x = self.b1(img)
        #x = self.b2(x)
        #x = self.b3(x)
        #x = self.b4(x)
        #x = self.b5(x)
        return t(img)
        

        

In [112]:
net=Googlenet()
print(net)

Googlenet(
  (b1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (b2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (b3): Sequential(
    (0): Inception(
      (p1_1): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
      (p2_1): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1))
      (p2_2): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (p3_1): Conv2d(192, 16, kernel_size=(1, 1), stride=(1, 1))
      (p3_2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (p4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
      (p4_2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1)

In [109]:
x = torch.rand(1, 1, 96, 96)
for a in net.children():
    print(a)
    x=a(x)

    print(x.shape)

Sequential(
  (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
torch.Size([1, 64, 24, 24])
Sequential(
  (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
  (1): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
torch.Size([1, 192, 12, 12])
Sequential(
  (0): Inception(
    (p1_1): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
    (p2_1): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1))
    (p2_2): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (p3_1): Conv2d(192, 16, kernel_size=(1, 1), stride=(1, 1))
    (p3_2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (p4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
    (p4_2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1))
    (p1)

In [86]:
x = torch.rand(1, 1, 96, 96)
x=x.to(torch.device("cuda"))
net(x)

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

In [110]:
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)


training on  cuda
epoch 1, loss 2.3029, train acc 0.101, test acc 0.102, time 32.6 sec
epoch 2, loss 1.1514, train acc 0.102, test acc 0.100, time 33.0 sec
epoch 3, loss 0.7676, train acc 0.099, test acc 0.103, time 32.7 sec
epoch 4, loss 0.5757, train acc 0.100, test acc 0.097, time 33.8 sec
epoch 5, loss 0.4606, train acc 0.100, test acc 0.100, time 33.2 sec


In [66]:
for i,j in net.named_children():
    print(i)

b1
b2
b3
b4
b5
t


# 5.10 批量归一化

In [300]:
net=nn.Sequential(
    nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size
    nn.BatchNorm2d(6),
    nn.Sigmoid(),
    nn.MaxPool2d(2, 2), # kernel_size, stride
    nn.Conv2d(6, 16, 5),
    nn.BatchNorm2d(16),
    nn.Sigmoid(),
    nn.MaxPool2d(2, 2),
    d2l.FlattenLayer(),
    nn.Linear(16*4*4, 120),
    nn.BatchNorm1d(120),
    nn.Sigmoid(),
    nn.Linear(120, 84),
    nn.BatchNorm1d(84),
    nn.Sigmoid(),
    nn.Linear(84, 10)

)

In [301]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)


training on  cuda
epoch 1, loss 0.9693, train acc 0.793, test acc 0.837, time 3.0 sec
epoch 2, loss 0.2279, train acc 0.863, test acc 0.832, time 3.0 sec
epoch 3, loss 0.1231, train acc 0.876, test acc 0.870, time 2.9 sec
epoch 4, loss 0.0829, train acc 0.886, test acc 0.871, time 2.8 sec
epoch 5, loss 0.0619, train acc 0.892, test acc 0.850, time 2.9 sec


# 5.11 ResNet

In [302]:
import time
import torch
from torch import nn,optim
import torch.nn.functional as F
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
device=torch.device("cuda")


In [329]:
class residual(nn.Module):
    def __init__(self,in_,out_,use11=False,stride=1):
        super(residual,self).__init__()
        self.c1=nn.Conv2d(in_,out_,kernel_size=3,padding=1,stride=stride)
        self.c2=nn.Conv2d(out_,out_,kernel_size=3,padding=1)
        self.b1=nn.BatchNorm2d(out_)
        self.b2=nn.BatchNorm2d(out_)
        self.net1=nn.Sequential(
            self.c1,
            self.b1,
            nn.ReLU()
        )
        if use11:
            self.net2=nn.Conv2d(in_, out_, kernel_size=1, stride=stride)
        else:
            self.net2=None

    def forward(self,img):
        r1=self.net1(img)
        if self.net2:
            r2=self.net2(img)
        else:
            r2=img
        return F.relu(r2+r1)

In [330]:
blk=residual(3,3)
x=torch.rand((4,3,6,6))
blk(x).shape

torch.Size([4, 3, 6, 6])

In [331]:
blk=residual(3,6,True,stride=2)
blk(x).shape

torch.Size([4, 6, 3, 3])

In [332]:
def resnet_block(in_,out_,num_,first_=False):
    if first_:
        assert in_ == out_
    blk=[]
    for i in range(num_):
        if i==0 and not first_:
            blk.append(residual(in_,out_,True,2))
        else:
            blk.append(residual(out_,out_))
    return nn.Sequential(*blk)

In [334]:
net= nn.Sequential(
    nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
    resnet_block(64, 64, 2, first_=True),
    resnet_block(64,128,2),
    resnet_block(128,256,2),
    resnet_block(256,512,2),
    d2l.GlobalAvgPool2d(),
    d2l.FlattenLayer(),
    nn.Linear(512,10)
)

In [336]:
x=torch.rand((1,1,224,224))
for n,l in net.named_children():
    x=l(x)
    print(n,"output shape ",x.shape)

0 output shape  torch.Size([1, 64, 112, 112])
1 output shape  torch.Size([1, 64, 112, 112])
2 output shape  torch.Size([1, 64, 112, 112])
3 output shape  torch.Size([1, 64, 56, 56])
4 output shape  torch.Size([1, 64, 56, 56])
5 output shape  torch.Size([1, 128, 28, 28])
6 output shape  torch.Size([1, 256, 14, 14])
7 output shape  torch.Size([1, 512, 7, 7])
8 output shape  torch.Size([1, 512, 1, 1])
9 output shape  torch.Size([1, 512])
10 output shape  torch.Size([1, 10])


In [337]:
batch_size = 256

train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)


training on  cuda
epoch 1, loss 0.3562, train acc 0.869, test acc 0.894, time 18.4 sec
epoch 2, loss 0.1133, train acc 0.916, test acc 0.881, time 18.5 sec
epoch 3, loss 0.0606, train acc 0.934, test acc 0.919, time 18.5 sec
epoch 4, loss 0.0385, train acc 0.943, test acc 0.920, time 18.6 sec
epoch 5, loss 0.0258, train acc 0.953, test acc 0.920, time 18.6 sec


# 5.12 DenseNet

In [374]:
def block(in_,out_):
    r=nn.Sequential(
        nn.BatchNorm2d(in_),
        nn.ReLU(),
        nn.Conv2d(in_,out_,kernel_size=3,padding=1)
    )
    return r

In [375]:
class denseblock(nn.Module):
    def __init__(self,num_,in_,out_):
        super(denseblock,self).__init__()
        net=[]
        for i in range(num_):
            in_c=in_+i*out_
            net.append(block(in_c,out_))
        self.nett=nn.ModuleList(net)
        self.out_channels=in_+num_*out_
    def forward(self,x):
        for blk in self.nett:
            y=blk(x)
            x=torch.cat((x,y),dim=1)
        return x

In [376]:
blk=denseblock(2,3,10)
x=torch.rand(4,3,8,8)
y=blk(x)
y.shape

torch.Size([4, 23, 8, 8])

In [385]:
def transition_block(in_,out_):
    blk=nn.Sequential(
        nn.BatchNorm2d(in_),
        nn.ReLU(),
        nn.Conv2d(in_,out_,kernel_size=1),
        nn.AvgPool2d(kernel_size=2,stride=2)
    )
    return blk

In [387]:
b=transition_block(23,10)
b(y).shape

torch.Size([4, 10, 4, 4])

In [390]:
net = nn.Sequential(
    nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)


In [391]:
num_,growth_=64,32
num_c=[4,4,4,4]

In [395]:
num_channels, growth_rate = 64, 32
 # num_channels为当前的通道数
num_convs_in_dense_blocks = [4, 4, 4, 4]
for i, num_convs in enumerate(num_convs_in_dense_blocks):
    DB = denseblock(num_convs, num_channels, growth_rate)
    net.add_module("DenseBlosk_%d" % i, DB)
    # 上一个稠密块的输出通道数
    num_channels = DB.out_channels
    # 在稠密块之间加入通道数减半的过渡层
    if i != len(num_convs_in_dense_blocks) - 1:
        net.add_module("transition_block_%d" % i, transition_block(num_channels, num_channels //
        2))
        num_channels = num_channels // 2


In [397]:
net.add_module("BN", nn.BatchNorm2d(num_channels))
net.add_module("relu", nn.ReLU())
net.add_module("global_avg_pool", d2l.GlobalAvgPool2d())
net.add_module("fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(num_channels, 10)))


In [399]:
X = torch.rand((1, 1, 96, 96))
for name, layer in net.named_children():
    X = layer(X)
    print(name, ' output shape:\t', X.shape)


0  output shape:	 torch.Size([1, 64, 48, 48])
1  output shape:	 torch.Size([1, 64, 48, 48])
2  output shape:	 torch.Size([1, 64, 48, 48])
3  output shape:	 torch.Size([1, 64, 24, 24])
DenseBlosk_0  output shape:	 torch.Size([1, 192, 24, 24])
transition_block_0  output shape:	 torch.Size([1, 96, 12, 12])
DenseBlosk_1  output shape:	 torch.Size([1, 224, 12, 12])
transition_block_1  output shape:	 torch.Size([1, 112, 6, 6])
DenseBlosk_2  output shape:	 torch.Size([1, 240, 6, 6])
transition_block_2  output shape:	 torch.Size([1, 120, 3, 3])
DenseBlosk_3  output shape:	 torch.Size([1, 248, 3, 3])
BN  output shape:	 torch.Size([1, 248, 3, 3])
relu  output shape:	 torch.Size([1, 248, 3, 3])
global_avg_pool  output shape:	 torch.Size([1, 248, 1, 1])
fc  output shape:	 torch.Size([1, 10])


In [400]:
size = 256
# 如出现“out of memory”的报错信息,可减小batch_size或resize
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)


training on  cuda
epoch 1, loss 0.4580, train acc 0.838, test acc 0.830, time 21.5 sec
epoch 2, loss 0.1351, train acc 0.902, test acc 0.849, time 21.5 sec
epoch 3, loss 0.0772, train acc 0.916, test acc 0.893, time 21.6 sec
epoch 4, loss 0.0519, train acc 0.925, test acc 0.903, time 21.7 sec
epoch 5, loss 0.0382, train acc 0.930, test acc 0.914, time 21.6 sec
