In [10]:
import numpy as np
import mnist

class Conv3x3:
    # 使用3x3滤波器的卷积层。

    def __init__(self, num_filters, input_depth):
        self.num_filters = num_filters
        self.input_depth = input_depth

        # filters是一个4维数组，维度为(num_filters, input_depth, 3, 3)
        # 我们除以9来减小初始值的方差
        self.filters = np.random.randn(num_filters, 3, 3, input_depth) / 9

    def iterate_regions(self, image):
        '''
        生成所有可能的3x3图像区域，使用valid padding。
        - image是一个3维numpy数组。
        '''
        h, w, _ = image.shape

        for i in range(h - 2):
            for j in range(w - 2):
                im_region = image[i:(i + 3), j:(j + 3)]
                yield im_region, i, j

    def forward(self, input):
        '''
        使用给定的输入执行卷积层的前向传播。
        返回一个3维numpy数组，维度为(h, w, num_filters)。
        - input是一个3维numpy数组。
        '''
        self.last_input = input

        h, w, _ = input.shape
        output = np.zeros((h - 2, w - 2, self.num_filters))

        for im_region, i, j in self.iterate_regions(input):
            output[i, j] = np.sum(im_region * self.filters, axis=(1, 2, 3))

        return output

    def backprop(self, d_L_d_out, learn_rate):
        '''
        执行卷积层的反向传播。
        - d_L_d_out是该层输出的损失梯度。
        - learn_rate是一个浮点数。
        '''
        d_L_d_filters = np.zeros(self.filters.shape)
        d_L_d_input = np.zeros(self.last_input.shape)

        for im_region, i, j in self.iterate_regions(self.last_input):
            for f in range(self.num_filters):
                d_L_d_filters[f] += d_L_d_out[i, j, f] * im_region
                d_L_d_input[i:i+3, j:j+3] += d_L_d_out[i, j, f] * self.filters[f]

        # 更新filters
        self.filters -= learn_rate * d_L_d_filters

        return d_L_d_input



In [21]:
from maxpool import MaxPool2
from softmax import Softmax

train_images = mnist.train_images()[:1000]
train_labels = mnist.train_labels()[:1000]
test_images = mnist.test_images()[:1000]
test_labels = mnist.test_labels()[:1000]

conv1 = Conv3x3(num_filters=8, input_depth=1)
pool1 = MaxPool2()
conv2 = Conv3x3(num_filters=16, input_depth=8)
pool2 = MaxPool2()
softmax = Softmax(5 * 5 * 16, 10)

image = train_images[0]
label = train_labels[0]

# add new axis to image
image = image[:, :, np.newaxis]
print(image.shape)

out = conv1.forward((image / 255) - 0.5)
out = pool1.forward(out)
print("first conv and pool output shape: ", out.shape)
out = conv2.forward(out)
out = pool2.forward(out)
print("second conv and pool output shape: ", out.shape)
out = softmax.forward(out)
print("softmax output shape: ", out.shape)

# test backward
lr = 1e-3
gradient = np.zeros(10)
gradient[label] = -1 / out[label]
gradient = softmax.backprop(gradient, lr)
print("softmax backprop output shape: ", gradient.shape)
gradient = pool2.backprop(gradient)
gradient = conv2.backprop(gradient, lr)
print("second conv backprop output shape: ", gradient.shape)
gradient = pool1.backprop(gradient)
gradient = conv1.backprop(gradient, lr)
print("first conv backprop output shape: ", gradient.shape)

(28, 28, 1)
first conv and pool output shape:  (13, 13, 8)
second conv and pool output shape:  (5, 5, 16)
softmax output shape:  (10,)
softmax backprop output shape:  (5, 5, 16)
second conv backprop output shape:  (13, 13, 8)
first conv backprop output shape:  (28, 28, 1)


In [23]:
def forward(image, label):
    '''
    完整的前向传播。
    '''
    image = image[:, :, np.newaxis]
    out = conv1.forward((image / 255) - 0.5)
    out = pool1.forward(out)
    out = conv2.forward(out)
    out = pool2.forward(out)
    out = softmax.forward(out)

    loss = -np.log(out[label])
    acc = 1 if np.argmax(out) == label else 0

    return out, loss, acc

def train(im, label, lr=.005):
    '''
    完整的训练函数。
    '''
    # 前向传播
    out, loss, acc = forward(im, label)

    # 反向传播
    gradient = np.zeros(10)
    gradient[label] = -1 / out[label]

    gradient = softmax.backprop(gradient, lr)
    gradient = pool2.backprop(gradient)
    gradient = conv2.backprop(gradient, lr)
    gradient = pool1.backprop(gradient)
    gradient = conv1.backprop(gradient, lr)

    return loss, acc

print('MNIST CNN initialized!')

# 训练模型
for epoch in range(3):
    print('--- Epoch %d ---' % (epoch + 1))

    # 打乱数据集
    permutation = np.random.permutation(len(train_images))
    train_images = train_images[permutation]
    train_labels = train_labels[permutation]

    # 训练模型
    loss = 0
    num_correct = 0
    for i, (im, label) in enumerate(zip(train_images, train_labels)):
        if i % 100 == 99:
            print(
                '[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %
                (i + 1, loss / 100, num_correct)
            )
            loss = 0
            num_correct = 0

        l, acc = train(im, label)
        loss += l
        num_correct += acc

MNIST CNN initialized!
--- Epoch 1 ---
[Step 100] Past 100 steps: Average Loss 2.260 | Accuracy: 15%
[Step 200] Past 100 steps: Average Loss 2.159 | Accuracy: 29%
[Step 300] Past 100 steps: Average Loss 1.659 | Accuracy: 51%
[Step 400] Past 100 steps: Average Loss 1.234 | Accuracy: 65%
[Step 500] Past 100 steps: Average Loss 1.027 | Accuracy: 64%
[Step 600] Past 100 steps: Average Loss 0.771 | Accuracy: 78%
[Step 700] Past 100 steps: Average Loss 0.598 | Accuracy: 86%
[Step 800] Past 100 steps: Average Loss 0.702 | Accuracy: 79%
[Step 900] Past 100 steps: Average Loss 0.754 | Accuracy: 80%
[Step 1000] Past 100 steps: Average Loss 0.547 | Accuracy: 80%
--- Epoch 2 ---
[Step 100] Past 100 steps: Average Loss 0.428 | Accuracy: 86%
[Step 200] Past 100 steps: Average Loss 0.482 | Accuracy: 86%
[Step 300] Past 100 steps: Average Loss 0.431 | Accuracy: 88%
[Step 400] Past 100 steps: Average Loss 0.378 | Accuracy: 90%
[Step 500] Past 100 steps: Average Loss 0.401 | Accuracy: 87%
[Step 600] Pas

In [9]:
a = np.array([[[1,2,3],[4,5,6],[7,8,9]],[[1,2,3],[4,5,6],[7,8,9]]])
print(a.shape)
print(a)
b = np.array(range(9)).reshape(3,3)
print(b.shape)
print(b)
print("------------------")
print(b*a[0])
print(b*a)

(2, 3, 3)
[[[1 2 3]
  [4 5 6]
  [7 8 9]]

 [[1 2 3]
  [4 5 6]
  [7 8 9]]]
(3, 3)
[[0 1 2]
 [3 4 5]
 [6 7 8]]
------------------
[[ 0  2  6]
 [12 20 30]
 [42 56 72]]
[[[ 0  2  6]
  [12 20 30]
  [42 56 72]]

 [[ 0  2  6]
  [12 20 30]
  [42 56 72]]]
