In [1]:
import numpy as np
from cnn_utils import *
from dlsuper.cnn import CNNLayer, PoolingLayer, CNNModel, CNNPipe
from dlsuper.nn.NNLayer import NNLinearLayer, NNActivator
from dlsuper.nn.NNPipe import NNPipe
from dlsuper.cnn import CNNModel


X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = load_dataset()
X_train = X_train_orig/255.
X_train = X_train.transpose(1, 2, 3, 0)
X_test = X_test_orig/255.
X_test = X_test.transpose(1, 2, 3, 0)
Y_train = convert_to_one_hot(Y_train_orig, 6).T
Y_train = Y_train.transpose(1, 0)
Y_test = convert_to_one_hot(Y_test_orig, 6).T
Y_test = Y_test.transpose(1, 0)

In [2]:
import math
def compute_samemode_pad(in_width, in_height, filter_size, strides):
    # 先确定输出维度，记住是上取整
    stride_width, stride_height = strides
    filter_width, filter_height = filter_size
    out_height = math.ceil(float(in_height) / float(stride_height))
    out_width  = math.ceil(float(in_width) / float(stride_width))

    # 上面的公式
    if (in_height % stride_height == 0):
        pad_along_height = max(filter_height - stride_height, 0)
    else:
        pad_along_height = max(filter_height - (in_height % stride_height), 0)
    if (in_width % stride_width == 0):
        pad_along_width = max(filter_width - stride_width, 0)
    else:
        pad_along_width = max(filter_width - (in_width % stride_width), 0)

    # 因为pad是在上下、左右四侧pad。所以当pi不为偶数时要分配下
    # 这里是当pi为奇数时，下侧比上侧多一，右侧比左侧多一。
    #  Note that this is different from existing libraries such as cuDNN and Caffe, which explicitly specify the number of padded pixels and always pad the same number of pixels on both sides.
    pad_top = pad_along_height // 2
    pad_bottom = pad_along_height - pad_top
    pad_left = pad_along_width // 2
    pad_right = pad_along_width - pad_left
    return (pad_left, pad_right, pad_top, pad_bottom)

In [3]:
pad = compute_samemode_pad(64, 64, (4, 4), (1, 1))
cnn_layer1 = CNNLayer.CNNLayer(None, (4, 4, 3), 8, pad, (1, 1), "relu")

pad = compute_samemode_pad(64, 64, (8, 8), (8, 8))
pool_layer1 = PoolingLayer.PoolingLayer(cnn_layer1, (8, 8), pad, "max", (8, 8))

pad = compute_samemode_pad(8, 8, (2, 2), (1, 1))
cnn_layer2 = CNNLayer.CNNLayer(pool_layer1, (2, 2, 8), 16, pad, (1, 1), "relu")

pad = compute_samemode_pad(8, 8, (4, 4), (4, 4))
pool_layer2 = PoolingLayer.PoolingLayer(cnn_layer2, (4, 4), pad, "max", (4, 4))

cnn_pipe = CNNPipe.CNNPipe(cnn_layer1, pool_layer2)


liner_layer = NNLinearLayer(None, 64, 6)
outputLayer = NNActivator(liner_layer, 'softmax', 6)
nn_pipe = NNPipe(liner_layer, outputLayer)

model = CNNModel.CNNModel(cnn_pipe, nn_pipe)
model.fit(X_train[:,:,:,0:10], Y_train[:,0:10], learning_rate=0.01, iteration_count=6000, lambd=0)

0.30000000000000004
iteration1    cost:31.109584706294516
0.19999999999999996
iteration2    cost:35.55381109276515
0.19999999999999996
iteration3    cost:35.55381109276517
0.19999999999999996
iteration4    cost:2.7059962989438064
0.19999999999999996
iteration5    cost:2.7052182401330382
0.19999999999999996
iteration6    cost:2.704443463462341
0.19999999999999996
iteration7    cost:2.703671952437008
0.19999999999999996
iteration8    cost:2.702903690668769
0.19999999999999996
iteration9    cost:2.7021386618748293
0.19999999999999996
iteration10    cost:2.701376849876916
0.19999999999999996
iteration11    cost:2.700618238600347
0.19999999999999996


KeyboardInterrupt: 