# VGG16

In [1]:
import collections

import chainer
from chainer import functions as F
from chainer import initializers
from chainer import links as L

## モデル

VGG16は画像識別タスクで高いパフォーマンスを出したモデルである。  
畳み込み13層と全結合3層の計16層から成る畳み込みニューラルネットワークであり、詳細は以下のとおりである。  
（conv3-Xは畳み込みのカーネルサイズが3、特徴マップがX個であることを意味する。)

<img src="image/vgg16.png", style="width: 500px;">  
(引用 「Very Deep Convolutional Networks for Large-Scale Image Recognition」)

In [2]:
class VGG16(chainer.Chain):

    def  __init__(self, n_classes=1000):
        super(VGG16, self).__init__()
        
        # 重みの初期値
        kwargs = {
            'initialW': initializers.Normal(0.01),
            'initial_bias': initializers.Zero(),
            }
        
        with self.init_scope():
            # L.Convolution2D(in_channels, out_channels, ksize, stride, pad)
            self.conv1_1 = L.Convolution2D(3, 64, ksize=3, stride=1, pad=1, **kwargs)
            self.conv1_2 = L.Convolution2D(64, 64, ksize=3, stride=1, pad=1, **kwargs)
            
            self.conv2_1 = L.Convolution2D(64, 128, ksize=3, stride=1, pad=1, **kwargs)
            self.conv2_2 = L.Convolution2D(128, 128, ksize=3, stride=1, pad=1, **kwargs)
            
            self.conv3_1 = L.Convolution2D(128, 256, ksize=3, stride=1, pad=1, **kwargs)
            self.conv3_2 = L.Convolution2D(256, 256, ksize=3, stride=1, pad=1, **kwargs)
            self.conv3_3 = L.Convolution2D(256, 256, ksize=3, stride=1, pad=1, **kwargs)
            
            self.conv4_1 = L.Convolution2D(256, 512, ksize=3, stride=1, pad=1, **kwargs)
            self.conv4_2 = L.Convolution2D(512, 512, ksize=3, stride=1, pad=1, **kwargs)
            self.conv4_3 = L.Convolution2D(512, 512, ksize=3, stride=1, pad=1, **kwargs)
            
            self.conv5_1 = L.Convolution2D(512, 512, ksize=3, stride=1, pad=1, **kwargs)
            self.conv5_2 = L.Convolution2D(512, 512, ksize=3, stride=1, pad=1, **kwargs)
            self.conv5_3 = L.Convolution2D(512, 512, ksize=3, stride=1, pad=1, **kwargs)
            
            # L.Linear(in_size, out_size)
            self.fc6 = L.Linear(512 * 7 * 7, 4096, **kwargs)
            self.fc7 = L.Linear(4096, 4096, **kwargs)
            self.fc8 = L.Linear(4096, n_classes, **kwargs)
        
        self.functions = self.orderd_functions()
    
    def __call__(self, x):
        h = x
        for key, funcs in self.functions.items():
            for func in funcs:
                h = func(h)
        return h
    
    def orderd_functions(self):
        return collections.OrderedDict([      # size: 224
            ('conv1_1', [self.conv1_1, F.relu]),
            ('conv1_2', [self.conv1_2, F.relu]),
            ('pool1',     [_max_pooling_2d]),
                                                               # size: 112
            ('conv2_1', [self.conv2_1, F.relu]),
            ('conv2_2', [self.conv2_2, F.relu]),
            ('pool2',     [_max_pooling_2d]),
                                                                # size: 56
            ('conv3_1', [self.conv3_1, F.relu]),
            ('conv3_2', [self.conv3_2, F.relu]),
            ('conv3_3', [self.conv3_3, F.relu]),
            ('pool3',     [_max_pooling_2d]),
                                                                # size: 28
            ('conv4_1', [self.conv4_1, F.relu]),
            ('conv4_2', [self.conv4_2, F.relu]),
            ('conv4_3', [self.conv4_3, F.relu]),
            ('pool4',     [_max_pooling_2d]),
                                                                # size: 14
            ('conv5_1', [self.conv5_1, F.relu]),
            ('conv5_2', [self.conv5_2, F.relu]),
            ('conv5_3', [self.conv5_3, F.relu]),
            ('pool5',     [_max_pooling_2d]),
                                                                # size: 7
            ('fc6',        [self.fc6, F.relu, F.dropout]),
            ('fc7',        [self.fc7, F.relu, F.dropout]),
            ('fc8',        [self.fc8]),
            ('prob',      [F.softmax]),
        ])
    
def _max_pooling_2d(x):
    return F.max_pooling_2d(x, ksize=2)

In [3]:
vgg = VGG16()

In [4]:
import numpy as np
img = np.ones((1, 3, 224, 224), dtype=np.float32)
output = vgg(img)
print(output.shape)

(1, 1000)
