In [None]:
import torch
from torch import nn

In [None]:
class WiderBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, projection=None, drop_p=0.3):
        # drop_p = 0.3 for CIFAR, 0.4 for SVHN
        super().__init__()

        self.residual = nn.Sequential(nn.BatchNorm2d(in_channels),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias = False),
                                      nn.BatchNorm2d(out_channels),
                                      nn.ReLU(inplace=True),
                                      nn.Dropout(drop_p),
                                      nn.Conv2d(out_channels, out_channels, 3, padding=1, bias = False))

        self.projection = projection

    def forward(self, x):

        residual = self.residual(x)

        if self.projection is not None:
            shortcut = self.projection(x)
        else:
            shortcut = x

        out = residual + shortcut # 엉! ReLU 였는데 ReLU 없음!
        return out

class WRN(nn.Module):
    def __init__(self, depth, k, num_classes=1000, init_weights=True):
        super().__init__()
        N = int((depth-4)/3/2)
        # 4가 아닌 2를 빼는게 맞아보이긴 하는데,, 논문에서 말한 40층이 되려면 N=6에 대해 6*2*3+"4" 여야 40이 맞아서.. 추측컨데 projection 하는 conv도 센거 같다
        self.in_channels = 16

        self.conv1 = nn.Conv2d(3, 16, 3, padding=1, bias = False)
        # pre-act 구조에선 첫번째 conv block에서 pool 있으면 conv-BN-relu-pool -> Bottleneck 이렇게
        # 이유는? 맨처음에 bn-relu를 통과시키면 데이터 전처리에서 할 일을 하게 되는 셈이다
        # 근데 WRN 처럼 Block 들어가기 전 pooling이 없으면? conv -> Block 으로 바로 들어가는 듯 why?
        # conv-bn-relu -> Block 으로 넣으면 Block 에서 bn-relu를 만나서 bn-relu-bn-relu 이렇게 돼버린다!
        self.conv2 = self.make_layers(16*k, N, stride = 1)
        self.conv3 = self.make_layers(32*k, N, stride = 2)
        self.conv4 = self.make_layers(64*k, N, stride = 2)
        self.bn = nn.BatchNorm2d(64*k)
        self.relu = nn.ReLU(inplace=True)
        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(64*k, num_classes)

        # weight initialization
        if init_weights:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
                elif isinstance(m, nn.BatchNorm2d):
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)
                elif isinstance(m, nn.Linear):
                    nn.init.normal_(m.weight, 0, 0.01)
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.avg_pool(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc(x)
        return x

    def make_layers(self, out_channels, num_blocks, stride):

        if stride != 1 or self.in_channels != out_channels:
            projection = nn.Conv2d(self.in_channels, out_channels, 1, stride=stride, bias = False)
                # nn.BatchNorm2d(inner_channels * block.expansion)) # pre-act 라서 여기선 생략
        else:
            projection = None

        layers = []
        layers += [WiderBlock(self.in_channels, out_channels, stride, projection)] # projection은 첫 block에서만
        self.in_channels = out_channels
        for _ in range(1, num_blocks):
            layers += [WiderBlock(self.in_channels, out_channels)]

        return nn.Sequential(*layers)

In [None]:
model = WRN(depth=28, k=10, num_classes=10)
# print(model)
!pip install torchinfo
from torchinfo import summary
summary(model, (2,3, 224, 224), device="cpu")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


Layer (type:depth-idx)                   Output Shape              Param #
WRN                                      [2, 10]                   --
├─Conv2d: 1-1                            [2, 16, 224, 224]         432
├─Sequential: 1-2                        [2, 160, 224, 224]        --
│    └─WiderBlock: 2-1                   [2, 160, 224, 224]        --
│    │    └─Sequential: 3-1              [2, 160, 224, 224]        253,792
│    │    └─Conv2d: 3-2                  [2, 160, 224, 224]        2,560
│    └─WiderBlock: 2-2                   [2, 160, 224, 224]        --
│    │    └─Sequential: 3-3              [2, 160, 224, 224]        461,440
│    └─WiderBlock: 2-3                   [2, 160, 224, 224]        --
│    │    └─Sequential: 3-4              [2, 160, 224, 224]        461,440
│    └─WiderBlock: 2-4                   [2, 160, 224, 224]        --
│    │    └─Sequential: 3-5              [2, 160, 224, 224]        461,440
├─Sequential: 1-3                        [2, 320, 112, 112]  

In [None]:
x = torch.randn(2,3,224,224)
print(model(x).shape)

torch.Size([2, 10])
