Gradient-based learning applied to document recognition, 1998

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
class LeNet5(nn.Module):
    def __init__(self, num_class):
        self.cnn_level = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1),
            nn.Sigmoid(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            nn.Sigmoid(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1),
            nn.Sigmoid()
        )

        self.fc_level = nn.Sequential(
            nn.Linear(in_features=120, out_features=84),
            nn.Sigmoid(),
            nn.Linear(in_features=84, out_features=num_class)
        )
    
    def forward(self, x):
        x = self.cnn_level(x)
        x = torch.flatten(x)
        x = self.fc_level(x)
        probs = F.softmax(x, dim=1)
        return probs

아래처럼 단계적으로 진행해보자:

In [10]:
# primitive method
def LeNet_primitive(image, num_classes):
    C1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=(5,5),
                               stride=(1,1), padding=(0,0))
    
    S2 = nn.AvgPool2d(kernel_size=(2,2), stride=(2,2)) # SubSampling=Pooling

    C3 =  nn.Conv2d(in_channels=6, out_channels=16, kernel_size=(5,5),
                               stride=(1,1), padding=(0,0))
    
    S4 = nn.AvgPool2d(kernel_size=(2,2), stride=(2,2))

    C5 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=(5,5),
                               stride=(1,1), padding=(0,0))
    
    F6 = nn.Linear(in_features=120, out_features=84)

    flatten = nn.Flatten()

    classifier = nn.Linear(in_features=84, out_features=num_classes)

    # 각 layer를 거치면서 변하는 size 확인
    x = C1(image)
    print(x.size())
    x = S2(x)
    print(x.size())
    x = C3(x)
    print(x.size())
    x = S4(x)
    print(x.size())
    x= C5(x)
    print(x.size())
    x = flatten(x)
    print(x.size())
    x = F6(x)
    print(x.size())

    x = classifier(x)
    print(x.size())

    return x

In [11]:
torch.manual_seed(42)
image = torch.randn(64, 1, 32, 32)
num_classes = 10
output = LeNet_primitive(image, num_classes)

torch.Size([64, 6, 28, 28])
torch.Size([64, 6, 14, 14])
torch.Size([64, 16, 10, 10])
torch.Size([64, 16, 5, 5])
torch.Size([64, 120, 1, 1])
torch.Size([64, 120])
torch.Size([64, 84])
torch.Size([64, 10])


In [12]:
output[0]

tensor([-0.0533, -0.0760,  0.0290, -0.0205,  0.0147, -0.0804, -0.0519, -0.1315,
        -0.1492,  0.0654], grad_fn=<SelectBackward0>)

In [3]:
# nn.Module을 사용한 LeNet architecture를 구현하는 class 생성
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.relu = nn.ReLU()
        self.pool = nn.AvgPool2d(kernel_size=(2,2), stride=(2,2))
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=(5,5),
                               stride=(1,1), padding=(0,0))
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=(5,5),
                               stride=(1,1), padding=(0,0))
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=(5,5),
                               stride=(1,1), padding=(0,0))
        self.linear1 = nn.Linear(120, 84)
        self.linear2 = nn.Linear(84, 10)        
    
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(self.conv3(x)) # N x 120 x 1 x 1 -> N x 120
        x = x.reshape(x.shape[0], -1)
        x = self.relu(self.linear1(x))
        return self.linear2(x)

In [4]:
image = torch.randn((64, 1, 32, 32))
model = LeNet()
with torch.no_grad():
    out = model(image)
out.size()

torch.Size([64, 10])

`nn.Sequential()` 을 써보자:

In [None]:
# nn.Sequential() 사용
# 아래처럼 블록으로 안 묶고, 다 풀어서 순서대로 써도 무방
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()

        self.c1_s2_block = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=(5, 5),
                      stride=(1, 1), padding=(0, 0)),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))
        )

        self.c3_s4_block = nn.Sequential(
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=(5, 5),
                      stride=(1, 1), padding=(0, 0)),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))
        )

        self.c5_f6_block = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=(5, 5),
                      stride=(1, 1), padding=(0, 0)),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(120, 84),
            nn.ReLU()
        )

        self.classifier = nn.Linear(84, 10)

    def forward(self, x):
        x = self.c1_s2_block(x)
        x = self.c3_s4_block(x)
        x = self.c5_f6_block(x)
        x = self.classifier(x)
        return x

In [7]:
image = torch.randn((64, 1, 32, 32))
model = LeNet()
with torch.no_grad():
    out = model(image)
out.size()

torch.Size([64, 10])