## 使用两层全连接NN训练MNIST

In [2]:
import numpy as np
import os
import sys
sys.path.append('..')
from utils.common import *
from utils.layers import *
from dataset.mnist import load_mnist
from model.refactTLNN import TwoLayerNet

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)

batch_size = 128
train_size = x_train.shape[0]
learning_rate = 1e-3
epoch_num = 50000

for epoch in range(epoch_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    train_loss = network.loss(x_batch, t_batch)
    train_acc = network.accuracy(x_batch, t_batch)
    grads = network.gradient(x_batch, t_batch)

    for key in ['W1', 'b1', 'W2', 'b2']:
        network.params[key] -= learning_rate * grads[key]

    if epoch % 500 == 0:
        test_loss = network.loss(x_test, t_test)
        test_acc = network.accuracy(x_test, t_test)
        print("epoch: %4d" % epoch, "train_loss: %6f" % train_loss, "train_acc: %6f" % train_acc,
              "test_loss: %6f" % test_loss, "test_acc: %6f" % test_acc)

epoch:    0 train_loss: 2.302008 train_acc: 0.078125 test_loss: 2.301647 test_acc: 0.151300
epoch:  500 train_loss: 0.380743 train_acc: 0.921875 test_loss: 0.311462 test_acc: 0.912400
epoch: 1000 train_loss: 0.159850 train_acc: 0.929688 test_loss: 0.243995 test_acc: 0.931200
epoch: 1500 train_loss: 0.126069 train_acc: 0.968750 test_loss: 0.198286 test_acc: 0.943100
epoch: 2000 train_loss: 0.154540 train_acc: 0.960938 test_loss: 0.169979 test_acc: 0.951700
epoch: 2500 train_loss: 0.110905 train_acc: 0.968750 test_loss: 0.151994 test_acc: 0.956000
epoch: 3000 train_loss: 0.124815 train_acc: 0.953125 test_loss: 0.138702 test_acc: 0.959600
epoch: 3500 train_loss: 0.123375 train_acc: 0.968750 test_loss: 0.124530 test_acc: 0.963200
epoch: 4000 train_loss: 0.134042 train_acc: 0.992188 test_loss: 0.115076 test_acc: 0.966200
epoch: 4500 train_loss: 0.132115 train_acc: 0.953125 test_loss: 0.107203 test_acc: 0.967900
epoch: 5000 train_loss: 0.136444 train_acc: 0.960938 test_loss: 0.101230 test_ac

epoch: 44500 train_loss: 0.006436 train_acc: 1.000000 test_loss: 0.075941 test_acc: 0.979600
epoch: 45000 train_loss: 0.002474 train_acc: 1.000000 test_loss: 0.074689 test_acc: 0.979900
epoch: 45500 train_loss: 0.003029 train_acc: 1.000000 test_loss: 0.075433 test_acc: 0.979400
epoch: 46000 train_loss: 0.004018 train_acc: 1.000000 test_loss: 0.076593 test_acc: 0.979200
epoch: 46500 train_loss: 0.003818 train_acc: 1.000000 test_loss: 0.076151 test_acc: 0.979700
epoch: 47000 train_loss: 0.004112 train_acc: 1.000000 test_loss: 0.076587 test_acc: 0.980100
epoch: 47500 train_loss: 0.002736 train_acc: 1.000000 test_loss: 0.076557 test_acc: 0.979600
epoch: 48000 train_loss: 0.004514 train_acc: 1.000000 test_loss: 0.076693 test_acc: 0.979500
epoch: 48500 train_loss: 0.005322 train_acc: 1.000000 test_loss: 0.076489 test_acc: 0.980000
epoch: 49000 train_loss: 0.002355 train_acc: 1.000000 test_loss: 0.075927 test_acc: 0.979500
epoch: 49500 train_loss: 0.003085 train_acc: 1.000000 test_loss: 0.077

可以看到两层NN有些过拟合，且epoch到了一定程度就达到极限，可能需要ReduceLROnPlateau, test_acc最好是98.06% 
## 使用三层全连接NN训练MNIST

In [3]:
import numpy as np
import os
import sys
sys.path.append('..')
from utils.common import *
from utils.layers import *
from dataset.mnist import load_mnist
from model.threeLayerNetwork import ThreeLayerNet
from utils.optimizer import *

(x_train, t_train), (x_test, t_test) = load_mnist()

network = ThreeLayerNet(input_size=28*28, hidden_size=100, output_size=10)

batch_size = 256
train_size = x_train.shape[0]
learning_rate = 1e-3
epoch_num = 50000

optimizer = SGD(lr=learning_rate)

for epoch in range(epoch_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    train_loss = network.loss(x_batch, t_batch)
    train_acc = network.accuracy(x_batch, t_batch)
    grads = network.gradient(x_batch, t_batch)

    optimizer.update(network.params, grads)

    if epoch % 500 == 0:
        test_loss = network.loss(x_test, t_test)
        test_acc = network.accuracy(x_test, t_test)
        print("epoch: %4d" % epoch, "train_loss: %6f" % train_loss, "train_acc: %6f" % train_acc,
              "test_loss: %6f" % test_loss, "test_acc: %6f" % test_acc)

epoch:    0 train_loss: 2.631788 train_acc: 0.125000 test_loss: 2.482315 test_acc: 0.098200
epoch:  500 train_loss: 0.243812 train_acc: 0.914062 test_loss: 0.269849 test_acc: 0.919800
epoch: 1000 train_loss: 0.184579 train_acc: 0.945312 test_loss: 0.170481 test_acc: 0.947200
epoch: 1500 train_loss: 0.097301 train_acc: 0.968750 test_loss: 0.129277 test_acc: 0.961500
epoch: 2000 train_loss: 0.132739 train_acc: 0.957031 test_loss: 0.121635 test_acc: 0.962600
epoch: 2500 train_loss: 0.078572 train_acc: 0.976562 test_loss: 0.101411 test_acc: 0.970000
epoch: 3000 train_loss: 0.056513 train_acc: 0.976562 test_loss: 0.101596 test_acc: 0.970000
epoch: 3500 train_loss: 0.067490 train_acc: 0.980469 test_loss: 0.098973 test_acc: 0.970600
epoch: 4000 train_loss: 0.013891 train_acc: 1.000000 test_loss: 0.090862 test_acc: 0.972300
epoch: 4500 train_loss: 0.021185 train_acc: 0.992188 test_loss: 0.091847 test_acc: 0.973600
epoch: 5000 train_loss: 0.034371 train_acc: 0.996094 test_loss: 0.094126 test_ac

epoch: 44500 train_loss: 0.000120 train_acc: 1.000000 test_loss: 0.152823 test_acc: 0.974700
epoch: 45000 train_loss: 0.000163 train_acc: 1.000000 test_loss: 0.152907 test_acc: 0.974300
epoch: 45500 train_loss: 0.000073 train_acc: 1.000000 test_loss: 0.153402 test_acc: 0.974600
epoch: 46000 train_loss: 0.000053 train_acc: 1.000000 test_loss: 0.153908 test_acc: 0.974800
epoch: 46500 train_loss: 0.000128 train_acc: 1.000000 test_loss: 0.153937 test_acc: 0.974700
epoch: 47000 train_loss: 0.000059 train_acc: 1.000000 test_loss: 0.154297 test_acc: 0.974600
epoch: 47500 train_loss: 0.000219 train_acc: 1.000000 test_loss: 0.153898 test_acc: 0.975000
epoch: 48000 train_loss: 0.000097 train_acc: 1.000000 test_loss: 0.154130 test_acc: 0.975000
epoch: 48500 train_loss: 0.000145 train_acc: 1.000000 test_loss: 0.154496 test_acc: 0.974900
epoch: 49000 train_loss: 0.000119 train_acc: 1.000000 test_loss: 0.154606 test_acc: 0.974600
epoch: 49500 train_loss: 0.000209 train_acc: 1.000000 test_loss: 0.155

效果并没有改善太多，可能是因为MNIST太过于简单，两层足够了，三层反而容易过拟合了
## 使用两层全连接NN训练CIFAR10

In [6]:
import numpy as np
import os
import sys
sys.path.append('..')
from utils.common import *
from utils.layers import *
from dataset.cifar10 import load_cifar10
from utils.optimizer import *
from model.refactTLNN import TwoLayerNet

(x_train, t_train), (x_test, t_test) = load_cifar10()

network = TwoLayerNet(input_size=3*32*32, hidden_size=100, output_size=10)

batch_size = 256
train_size = x_train.shape[0]
learning_rate = 1e-3
epoch_num = int(1e5)
# 这个网络层次太浅 epoch_num到20000 精度已经上不去了 50%

# optimizer = SGD(lr=learning_rate)
# optimizer = Momemtum(lr=learning_rate)
optimizer = AdaGrad(lr=learning_rate)

for epoch in range(epoch_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    train_loss = network.loss(x_batch, t_batch)
    train_acc = network.accuracy(x_batch, t_batch)
    grads = network.gradient(x_batch, t_batch)

    optimizer.update(network.params, grads)

    if epoch % 50 == 0:
        test_loss = network.loss(x_test, t_test)
        test_acc = network.accuracy(x_test, t_test)
        print("epoch: %4d" % epoch, "train_loss: %6f" % train_loss, "train_acc: %6f" % train_acc,
              "test_loss: %6f" % test_loss, "test_acc: %6f" % test_acc)

Downloading cifar10...
Done
epoch:    0 train_loss: 2.305836 train_acc: 0.093750 test_loss: 2.302427 test_acc: 0.100600
epoch:   50 train_loss: 2.163131 train_acc: 0.183594 test_loss: 2.124208 test_acc: 0.256100
epoch:  100 train_loss: 2.027387 train_acc: 0.269531 test_loss: 2.058223 test_acc: 0.275800
epoch:  150 train_loss: 2.014522 train_acc: 0.269531 test_loss: 2.016409 test_acc: 0.285900
epoch:  200 train_loss: 1.964637 train_acc: 0.296875 test_loss: 1.990987 test_acc: 0.295500
epoch:  250 train_loss: 1.994704 train_acc: 0.324219 test_loss: 1.974297 test_acc: 0.299900
epoch:  300 train_loss: 1.994738 train_acc: 0.281250 test_loss: 1.961776 test_acc: 0.304600
epoch:  350 train_loss: 1.877233 train_acc: 0.394531 test_loss: 1.949677 test_acc: 0.310600
epoch:  400 train_loss: 1.899472 train_acc: 0.343750 test_loss: 1.939001 test_acc: 0.314000
epoch:  450 train_loss: 1.882848 train_acc: 0.378906 test_loss: 1.929881 test_acc: 0.310700
epoch:  500 train_loss: 1.932587 train_acc: 0.324219

epoch: 4450 train_loss: 1.700308 train_acc: 0.386719 test_loss: 1.755284 test_acc: 0.386800
epoch: 4500 train_loss: 1.767650 train_acc: 0.378906 test_loss: 1.754397 test_acc: 0.385700
epoch: 4550 train_loss: 1.774752 train_acc: 0.367188 test_loss: 1.754043 test_acc: 0.385900
epoch: 4600 train_loss: 1.762010 train_acc: 0.402344 test_loss: 1.752788 test_acc: 0.386300
epoch: 4650 train_loss: 1.701988 train_acc: 0.398438 test_loss: 1.750917 test_acc: 0.387200
epoch: 4700 train_loss: 1.758365 train_acc: 0.378906 test_loss: 1.750246 test_acc: 0.385100
epoch: 4750 train_loss: 1.641484 train_acc: 0.460938 test_loss: 1.748967 test_acc: 0.388500
epoch: 4800 train_loss: 1.795086 train_acc: 0.351562 test_loss: 1.748325 test_acc: 0.388500
epoch: 4850 train_loss: 1.778810 train_acc: 0.363281 test_loss: 1.747434 test_acc: 0.388000
epoch: 4900 train_loss: 1.827456 train_acc: 0.367188 test_loss: 1.746482 test_acc: 0.387600
epoch: 4950 train_loss: 1.703088 train_acc: 0.382812 test_loss: 1.745349 test_ac

epoch: 8950 train_loss: 1.679559 train_acc: 0.410156 test_loss: 1.691042 test_acc: 0.412300
epoch: 9000 train_loss: 1.675223 train_acc: 0.410156 test_loss: 1.690539 test_acc: 0.410200
epoch: 9050 train_loss: 1.709469 train_acc: 0.417969 test_loss: 1.690127 test_acc: 0.412200
epoch: 9100 train_loss: 1.686625 train_acc: 0.437500 test_loss: 1.689146 test_acc: 0.411800
epoch: 9150 train_loss: 1.655619 train_acc: 0.437500 test_loss: 1.689341 test_acc: 0.412200
epoch: 9200 train_loss: 1.751075 train_acc: 0.390625 test_loss: 1.688159 test_acc: 0.412000
epoch: 9250 train_loss: 1.618604 train_acc: 0.441406 test_loss: 1.688056 test_acc: 0.412400
epoch: 9300 train_loss: 1.654588 train_acc: 0.414062 test_loss: 1.687514 test_acc: 0.413600
epoch: 9350 train_loss: 1.714017 train_acc: 0.355469 test_loss: 1.687169 test_acc: 0.411600
epoch: 9400 train_loss: 1.588065 train_acc: 0.460938 test_loss: 1.686874 test_acc: 0.411500
epoch: 9450 train_loss: 1.745008 train_acc: 0.421875 test_loss: 1.685821 test_ac

epoch: 13400 train_loss: 1.567130 train_acc: 0.445312 test_loss: 1.656010 test_acc: 0.424500
epoch: 13450 train_loss: 1.574513 train_acc: 0.468750 test_loss: 1.655914 test_acc: 0.422000
epoch: 13500 train_loss: 1.694485 train_acc: 0.406250 test_loss: 1.655490 test_acc: 0.423500
epoch: 13550 train_loss: 1.721380 train_acc: 0.457031 test_loss: 1.655168 test_acc: 0.424600
epoch: 13600 train_loss: 1.629202 train_acc: 0.453125 test_loss: 1.654840 test_acc: 0.423200
epoch: 13650 train_loss: 1.660307 train_acc: 0.433594 test_loss: 1.654999 test_acc: 0.423300
epoch: 13700 train_loss: 1.736534 train_acc: 0.382812 test_loss: 1.654258 test_acc: 0.424300
epoch: 13750 train_loss: 1.671042 train_acc: 0.406250 test_loss: 1.654186 test_acc: 0.424400
epoch: 13800 train_loss: 1.594397 train_acc: 0.421875 test_loss: 1.653559 test_acc: 0.424800
epoch: 13850 train_loss: 1.600287 train_acc: 0.460938 test_loss: 1.653557 test_acc: 0.423000
epoch: 13900 train_loss: 1.647956 train_acc: 0.441406 test_loss: 1.653

epoch: 17850 train_loss: 1.632125 train_acc: 0.425781 test_loss: 1.633157 test_acc: 0.433000
epoch: 17900 train_loss: 1.591240 train_acc: 0.453125 test_loss: 1.632889 test_acc: 0.433400
epoch: 17950 train_loss: 1.699142 train_acc: 0.378906 test_loss: 1.632857 test_acc: 0.434300
epoch: 18000 train_loss: 1.570802 train_acc: 0.417969 test_loss: 1.632341 test_acc: 0.433900
epoch: 18050 train_loss: 1.635924 train_acc: 0.445312 test_loss: 1.632514 test_acc: 0.432600
epoch: 18100 train_loss: 1.718316 train_acc: 0.390625 test_loss: 1.632512 test_acc: 0.434400
epoch: 18150 train_loss: 1.657607 train_acc: 0.398438 test_loss: 1.631703 test_acc: 0.434600
epoch: 18200 train_loss: 1.618679 train_acc: 0.437500 test_loss: 1.631651 test_acc: 0.433800
epoch: 18250 train_loss: 1.585635 train_acc: 0.433594 test_loss: 1.631568 test_acc: 0.433000
epoch: 18300 train_loss: 1.614987 train_acc: 0.460938 test_loss: 1.631096 test_acc: 0.432500
epoch: 18350 train_loss: 1.655076 train_acc: 0.414062 test_loss: 1.630

epoch: 22300 train_loss: 1.574738 train_acc: 0.464844 test_loss: 1.617068 test_acc: 0.436700
epoch: 22350 train_loss: 1.705302 train_acc: 0.394531 test_loss: 1.616276 test_acc: 0.437900
epoch: 22400 train_loss: 1.587909 train_acc: 0.472656 test_loss: 1.616363 test_acc: 0.438000
epoch: 22450 train_loss: 1.634529 train_acc: 0.402344 test_loss: 1.616669 test_acc: 0.439400
epoch: 22500 train_loss: 1.636263 train_acc: 0.429688 test_loss: 1.616338 test_acc: 0.438100
epoch: 22550 train_loss: 1.607531 train_acc: 0.445312 test_loss: 1.616029 test_acc: 0.437500
epoch: 22600 train_loss: 1.650632 train_acc: 0.429688 test_loss: 1.615733 test_acc: 0.440300
epoch: 22650 train_loss: 1.506352 train_acc: 0.460938 test_loss: 1.615947 test_acc: 0.438900
epoch: 22700 train_loss: 1.661629 train_acc: 0.425781 test_loss: 1.615847 test_acc: 0.438900
epoch: 22750 train_loss: 1.693542 train_acc: 0.390625 test_loss: 1.615571 test_acc: 0.440600
epoch: 22800 train_loss: 1.659831 train_acc: 0.425781 test_loss: 1.615

epoch: 26750 train_loss: 1.569792 train_acc: 0.492188 test_loss: 1.603893 test_acc: 0.442400
epoch: 26800 train_loss: 1.438789 train_acc: 0.523438 test_loss: 1.604082 test_acc: 0.443000
epoch: 26850 train_loss: 1.620169 train_acc: 0.441406 test_loss: 1.603851 test_acc: 0.442800
epoch: 26900 train_loss: 1.492963 train_acc: 0.484375 test_loss: 1.603760 test_acc: 0.443400
epoch: 26950 train_loss: 1.626151 train_acc: 0.464844 test_loss: 1.603411 test_acc: 0.442400
epoch: 27000 train_loss: 1.689426 train_acc: 0.429688 test_loss: 1.603530 test_acc: 0.442500
epoch: 27050 train_loss: 1.574120 train_acc: 0.449219 test_loss: 1.603657 test_acc: 0.443600
epoch: 27100 train_loss: 1.650575 train_acc: 0.449219 test_loss: 1.602951 test_acc: 0.443500
epoch: 27150 train_loss: 1.645398 train_acc: 0.417969 test_loss: 1.603221 test_acc: 0.441900
epoch: 27200 train_loss: 1.565733 train_acc: 0.480469 test_loss: 1.603341 test_acc: 0.441700
epoch: 27250 train_loss: 1.690219 train_acc: 0.390625 test_loss: 1.602

epoch: 31200 train_loss: 1.499436 train_acc: 0.472656 test_loss: 1.593998 test_acc: 0.446500
epoch: 31250 train_loss: 1.669086 train_acc: 0.410156 test_loss: 1.593707 test_acc: 0.446400
epoch: 31300 train_loss: 1.621290 train_acc: 0.457031 test_loss: 1.593508 test_acc: 0.445000
epoch: 31350 train_loss: 1.449872 train_acc: 0.500000 test_loss: 1.593274 test_acc: 0.445200
epoch: 31400 train_loss: 1.486946 train_acc: 0.500000 test_loss: 1.593300 test_acc: 0.447900
epoch: 31450 train_loss: 1.577464 train_acc: 0.441406 test_loss: 1.593037 test_acc: 0.445900
epoch: 31500 train_loss: 1.641806 train_acc: 0.429688 test_loss: 1.592943 test_acc: 0.446600
epoch: 31550 train_loss: 1.587834 train_acc: 0.484375 test_loss: 1.593316 test_acc: 0.446600
epoch: 31600 train_loss: 1.644062 train_acc: 0.421875 test_loss: 1.592699 test_acc: 0.445200
epoch: 31650 train_loss: 1.571916 train_acc: 0.437500 test_loss: 1.593409 test_acc: 0.444700
epoch: 31700 train_loss: 1.503304 train_acc: 0.507812 test_loss: 1.593

epoch: 35650 train_loss: 1.639037 train_acc: 0.417969 test_loss: 1.585496 test_acc: 0.448800
epoch: 35700 train_loss: 1.686240 train_acc: 0.429688 test_loss: 1.585256 test_acc: 0.449500
epoch: 35750 train_loss: 1.542631 train_acc: 0.472656 test_loss: 1.585638 test_acc: 0.448900
epoch: 35800 train_loss: 1.597316 train_acc: 0.429688 test_loss: 1.585171 test_acc: 0.448900
epoch: 35850 train_loss: 1.469882 train_acc: 0.492188 test_loss: 1.585017 test_acc: 0.448900
epoch: 35900 train_loss: 1.609920 train_acc: 0.441406 test_loss: 1.585211 test_acc: 0.449200
epoch: 35950 train_loss: 1.466077 train_acc: 0.445312 test_loss: 1.585037 test_acc: 0.449800
epoch: 36000 train_loss: 1.586988 train_acc: 0.449219 test_loss: 1.585142 test_acc: 0.449700
epoch: 36050 train_loss: 1.502786 train_acc: 0.527344 test_loss: 1.584829 test_acc: 0.449800
epoch: 36100 train_loss: 1.535332 train_acc: 0.464844 test_loss: 1.584869 test_acc: 0.448400
epoch: 36150 train_loss: 1.562063 train_acc: 0.441406 test_loss: 1.584

epoch: 40100 train_loss: 1.458024 train_acc: 0.503906 test_loss: 1.577893 test_acc: 0.450700
epoch: 40150 train_loss: 1.495165 train_acc: 0.531250 test_loss: 1.577795 test_acc: 0.451600
epoch: 40200 train_loss: 1.586784 train_acc: 0.445312 test_loss: 1.577744 test_acc: 0.451700
epoch: 40250 train_loss: 1.518311 train_acc: 0.484375 test_loss: 1.577990 test_acc: 0.451600
epoch: 40300 train_loss: 1.577133 train_acc: 0.460938 test_loss: 1.577699 test_acc: 0.452100
epoch: 40350 train_loss: 1.642484 train_acc: 0.429688 test_loss: 1.577621 test_acc: 0.451200
epoch: 40400 train_loss: 1.462679 train_acc: 0.484375 test_loss: 1.577474 test_acc: 0.451600
epoch: 40450 train_loss: 1.590048 train_acc: 0.449219 test_loss: 1.577463 test_acc: 0.450400
epoch: 40500 train_loss: 1.415659 train_acc: 0.531250 test_loss: 1.577899 test_acc: 0.450800
epoch: 40550 train_loss: 1.650156 train_acc: 0.441406 test_loss: 1.577366 test_acc: 0.449800
epoch: 40600 train_loss: 1.616305 train_acc: 0.394531 test_loss: 1.577

epoch: 44550 train_loss: 1.624876 train_acc: 0.441406 test_loss: 1.571925 test_acc: 0.451500
epoch: 44600 train_loss: 1.474690 train_acc: 0.484375 test_loss: 1.571619 test_acc: 0.451800
epoch: 44650 train_loss: 1.657760 train_acc: 0.429688 test_loss: 1.571679 test_acc: 0.450500
epoch: 44700 train_loss: 1.587948 train_acc: 0.441406 test_loss: 1.571609 test_acc: 0.450000
epoch: 44750 train_loss: 1.584913 train_acc: 0.429688 test_loss: 1.571531 test_acc: 0.451500
epoch: 44800 train_loss: 1.524976 train_acc: 0.449219 test_loss: 1.571451 test_acc: 0.453400
epoch: 44850 train_loss: 1.500177 train_acc: 0.500000 test_loss: 1.571823 test_acc: 0.451300
epoch: 44900 train_loss: 1.509981 train_acc: 0.437500 test_loss: 1.572219 test_acc: 0.449700
epoch: 44950 train_loss: 1.479001 train_acc: 0.460938 test_loss: 1.571580 test_acc: 0.451700
epoch: 45000 train_loss: 1.632749 train_acc: 0.468750 test_loss: 1.571595 test_acc: 0.451300
epoch: 45050 train_loss: 1.558984 train_acc: 0.449219 test_loss: 1.571

epoch: 49000 train_loss: 1.496029 train_acc: 0.492188 test_loss: 1.566147 test_acc: 0.453400
epoch: 49050 train_loss: 1.568941 train_acc: 0.425781 test_loss: 1.565699 test_acc: 0.453500
epoch: 49100 train_loss: 1.546121 train_acc: 0.476562 test_loss: 1.566006 test_acc: 0.452000
epoch: 49150 train_loss: 1.478186 train_acc: 0.457031 test_loss: 1.566239 test_acc: 0.452900
epoch: 49200 train_loss: 1.636631 train_acc: 0.433594 test_loss: 1.566047 test_acc: 0.452000
epoch: 49250 train_loss: 1.620348 train_acc: 0.390625 test_loss: 1.565816 test_acc: 0.452800
epoch: 49300 train_loss: 1.578254 train_acc: 0.460938 test_loss: 1.565764 test_acc: 0.451900
epoch: 49350 train_loss: 1.508459 train_acc: 0.449219 test_loss: 1.565552 test_acc: 0.452800
epoch: 49400 train_loss: 1.555803 train_acc: 0.453125 test_loss: 1.565270 test_acc: 0.455400
epoch: 49450 train_loss: 1.563952 train_acc: 0.457031 test_loss: 1.565340 test_acc: 0.453700
epoch: 49500 train_loss: 1.616814 train_acc: 0.441406 test_loss: 1.565

KeyboardInterrupt: 

## 使用三层全连接NN训练CIFAR10

In [9]:
import numpy as np
import os
import sys
sys.path.append('..')
from utils.common import *
from utils.layers import *
from dataset.cifar10 import load_cifar10
from utils.optimizer import *
from model.threeLayerNetwork import ThreeLayerNet

(x_train, t_train), (x_test, t_test) = load_cifar10()

network = ThreeLayerNet(input_size=3*32*32, hidden_size=300, output_size=10)

batch_size = 128
train_size = x_train.shape[0]
learning_rate = 1e-3
epoch_num = 100000

optimizer = SGD(lr=learning_rate)

for epoch in range(epoch_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    train_loss = network.loss(x_batch, t_batch)
    train_acc = network.accuracy(x_batch, t_batch)
    grads = network.gradient(x_batch, t_batch)

    optimizer.update(network.params, grads)

    if epoch % 100 == 0:
        test_loss = network.loss(x_test, t_test)
        test_acc = network.accuracy(x_test, t_test)
        print("epoch: %4d" % epoch, "train_loss: %6f" % train_loss, "train_acc: %6f" % train_acc,
              "test_loss: %6f" % test_loss, "test_acc: %6f" % test_acc)

epoch:    0 train_loss: 2.704600 train_acc: 0.140625 test_loss: 2.705148 test_acc: 0.100000
epoch:  100 train_loss: 2.183924 train_acc: 0.156250 test_loss: 2.306936 test_acc: 0.146400
epoch:  200 train_loss: 2.200534 train_acc: 0.171875 test_loss: 2.021915 test_acc: 0.266500
epoch:  300 train_loss: 2.096825 train_acc: 0.242188 test_loss: 2.047453 test_acc: 0.249100
epoch:  400 train_loss: 1.929038 train_acc: 0.312500 test_loss: 1.920862 test_acc: 0.296700
epoch:  500 train_loss: 2.038799 train_acc: 0.226562 test_loss: 1.859620 test_acc: 0.314700
epoch:  600 train_loss: 1.844721 train_acc: 0.289062 test_loss: 1.930973 test_acc: 0.302500
epoch:  700 train_loss: 1.800556 train_acc: 0.343750 test_loss: 1.772835 test_acc: 0.362000
epoch:  800 train_loss: 1.766768 train_acc: 0.335938 test_loss: 1.791984 test_acc: 0.347300
epoch:  900 train_loss: 2.127633 train_acc: 0.250000 test_loss: 2.049821 test_acc: 0.285700
epoch: 1000 train_loss: 1.758553 train_acc: 0.367188 test_loss: 1.739638 test_ac

epoch: 9000 train_loss: 1.136730 train_acc: 0.593750 test_loss: 1.381882 test_acc: 0.513200
epoch: 9100 train_loss: 1.266407 train_acc: 0.531250 test_loss: 1.414052 test_acc: 0.507800
epoch: 9200 train_loss: 0.961415 train_acc: 0.679688 test_loss: 1.374960 test_acc: 0.520500
epoch: 9300 train_loss: 1.241554 train_acc: 0.562500 test_loss: 1.426635 test_acc: 0.497800
epoch: 9400 train_loss: 1.190736 train_acc: 0.531250 test_loss: 1.366529 test_acc: 0.514400
epoch: 9500 train_loss: 1.280435 train_acc: 0.515625 test_loss: 1.437775 test_acc: 0.495400
epoch: 9600 train_loss: 1.292292 train_acc: 0.546875 test_loss: 1.388301 test_acc: 0.513500
epoch: 9700 train_loss: 1.018319 train_acc: 0.632812 test_loss: 1.382408 test_acc: 0.519300
epoch: 9800 train_loss: 1.222544 train_acc: 0.593750 test_loss: 1.356523 test_acc: 0.512600
epoch: 9900 train_loss: 1.177325 train_acc: 0.562500 test_loss: 1.377171 test_acc: 0.520300
epoch: 10000 train_loss: 1.041572 train_acc: 0.601562 test_loss: 1.421594 test_a

epoch: 17900 train_loss: 0.985592 train_acc: 0.632812 test_loss: 1.544901 test_acc: 0.509200
epoch: 18000 train_loss: 0.906674 train_acc: 0.656250 test_loss: 1.515752 test_acc: 0.520500
epoch: 18100 train_loss: 0.843115 train_acc: 0.703125 test_loss: 1.550784 test_acc: 0.521000
epoch: 18200 train_loss: 0.760574 train_acc: 0.703125 test_loss: 1.550037 test_acc: 0.512900
epoch: 18300 train_loss: 0.880436 train_acc: 0.718750 test_loss: 1.480081 test_acc: 0.526800
epoch: 18400 train_loss: 0.858105 train_acc: 0.726562 test_loss: 1.482806 test_acc: 0.534300
epoch: 18500 train_loss: 0.745694 train_acc: 0.757812 test_loss: 1.514650 test_acc: 0.529600
epoch: 18600 train_loss: 0.813505 train_acc: 0.710938 test_loss: 1.533935 test_acc: 0.518400
epoch: 18700 train_loss: 0.877442 train_acc: 0.671875 test_loss: 1.522464 test_acc: 0.526500
epoch: 18800 train_loss: 0.673379 train_acc: 0.804688 test_loss: 1.530445 test_acc: 0.519700
epoch: 18900 train_loss: 0.870660 train_acc: 0.648438 test_loss: 1.561

epoch: 26800 train_loss: 0.820123 train_acc: 0.718750 test_loss: 1.810910 test_acc: 0.518300
epoch: 26900 train_loss: 0.620975 train_acc: 0.812500 test_loss: 1.874012 test_acc: 0.510000
epoch: 27000 train_loss: 0.676682 train_acc: 0.710938 test_loss: 1.924084 test_acc: 0.497500
epoch: 27100 train_loss: 0.545090 train_acc: 0.789062 test_loss: 1.818956 test_acc: 0.515700
epoch: 27200 train_loss: 0.536829 train_acc: 0.789062 test_loss: 1.831334 test_acc: 0.516000
epoch: 27300 train_loss: 0.650015 train_acc: 0.781250 test_loss: 1.826655 test_acc: 0.515800
epoch: 27400 train_loss: 0.628867 train_acc: 0.742188 test_loss: 1.970955 test_acc: 0.506700
epoch: 27500 train_loss: 0.452319 train_acc: 0.843750 test_loss: 1.885491 test_acc: 0.519400
epoch: 27600 train_loss: 0.419426 train_acc: 0.859375 test_loss: 1.859821 test_acc: 0.515500
epoch: 27700 train_loss: 0.483524 train_acc: 0.828125 test_loss: 1.867845 test_acc: 0.514500
epoch: 27800 train_loss: 0.506927 train_acc: 0.796875 test_loss: 1.845

epoch: 35700 train_loss: 0.382897 train_acc: 0.882812 test_loss: 2.270108 test_acc: 0.509500
epoch: 35800 train_loss: 0.409866 train_acc: 0.828125 test_loss: 2.238593 test_acc: 0.512900
epoch: 35900 train_loss: 0.448314 train_acc: 0.828125 test_loss: 2.242247 test_acc: 0.521300
epoch: 36000 train_loss: 0.513117 train_acc: 0.796875 test_loss: 2.335678 test_acc: 0.503900
epoch: 36100 train_loss: 0.611561 train_acc: 0.796875 test_loss: 2.285781 test_acc: 0.506400
epoch: 36200 train_loss: 0.469836 train_acc: 0.828125 test_loss: 2.248836 test_acc: 0.513500
epoch: 36300 train_loss: 0.443955 train_acc: 0.828125 test_loss: 2.376142 test_acc: 0.510100
epoch: 36400 train_loss: 0.293063 train_acc: 0.890625 test_loss: 2.350510 test_acc: 0.511900
epoch: 36500 train_loss: 0.327317 train_acc: 0.890625 test_loss: 2.274827 test_acc: 0.511500
epoch: 36600 train_loss: 0.477030 train_acc: 0.773438 test_loss: 2.301455 test_acc: 0.517800
epoch: 36700 train_loss: 0.473443 train_acc: 0.843750 test_loss: 2.354

KeyboardInterrupt: 

三层全连接NN轻松就超过了两层，因为CIFAR10数据复杂，不会那么容易过拟合，两层反而存在欠拟合，训练不足的问题，但是三层已经能看出过拟合的迹象了。
## 使用五层全连接NN训练CIFAR10

In [12]:
import numpy as np
from dataset.cifar10 import load_cifar10
from utils.optimizer import *
from model.fiveLayerNetwork import FiveLayerNet

(x_train, t_train), (x_test, t_test) = load_cifar10()
network = FiveLayerNet(input_size=3*32*32, hidden_size=200, output_size=10)

batch_size = 128
train_size = x_train.shape[0]
learning_rate = 1e-3
epoch_num = 80000

optimizer = SGD(lr=learning_rate)

for epoch in range(epoch_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    train_loss = network.loss(x_batch, t_batch)
    train_acc = network.accuracy(x_batch, t_batch)
    grads = network.gradient(x_batch, t_batch)

    optimizer.update(network.params, grads)

    if epoch % 100 == 0:
        test_loss = network.loss(x_test, t_test)
        test_acc = network.accuracy(x_test, t_test)
        print("epoch: %4d" % epoch, "train_loss: %6f" % train_loss, "train_acc: %6f" % train_acc,
              "test_loss: %6f" % test_loss, "test_acc: %6f" % test_acc)

epoch:    0 train_loss: 2.356606 train_acc: 0.109375 test_loss: 2.342095 test_acc: 0.100000
epoch:  100 train_loss: 2.318498 train_acc: 0.070312 test_loss: 2.305879 test_acc: 0.100000
epoch:  200 train_loss: 2.302928 train_acc: 0.171875 test_loss: 2.307600 test_acc: 0.100000
epoch:  300 train_loss: 2.318450 train_acc: 0.085938 test_loss: 2.305752 test_acc: 0.100000
epoch:  400 train_loss: 2.304332 train_acc: 0.101562 test_loss: 2.303531 test_acc: 0.100000
epoch:  500 train_loss: 2.304158 train_acc: 0.085938 test_loss: 2.303223 test_acc: 0.100000
epoch:  600 train_loss: 2.305541 train_acc: 0.085938 test_loss: 2.303331 test_acc: 0.100000
epoch:  700 train_loss: 2.305713 train_acc: 0.109375 test_loss: 2.303781 test_acc: 0.100000
epoch:  800 train_loss: 2.305458 train_acc: 0.093750 test_loss: 2.303180 test_acc: 0.100000
epoch:  900 train_loss: 2.301589 train_acc: 0.093750 test_loss: 2.301762 test_acc: 0.099900
epoch: 1000 train_loss: 2.296131 train_acc: 0.101562 test_loss: 2.299602 test_ac

epoch: 9000 train_loss: 1.394955 train_acc: 0.468750 test_loss: 1.459845 test_acc: 0.478000
epoch: 9100 train_loss: 1.580997 train_acc: 0.421875 test_loss: 1.476254 test_acc: 0.463700
epoch: 9200 train_loss: 1.374063 train_acc: 0.492188 test_loss: 1.420656 test_acc: 0.491700
epoch: 9300 train_loss: 1.427221 train_acc: 0.429688 test_loss: 1.459515 test_acc: 0.477800
epoch: 9400 train_loss: 1.258328 train_acc: 0.570312 test_loss: 1.456307 test_acc: 0.473100
epoch: 9500 train_loss: 1.419414 train_acc: 0.484375 test_loss: 1.432838 test_acc: 0.485500
epoch: 9600 train_loss: 1.423971 train_acc: 0.507812 test_loss: 1.570189 test_acc: 0.453300
epoch: 9700 train_loss: 1.331346 train_acc: 0.484375 test_loss: 1.439612 test_acc: 0.480400
epoch: 9800 train_loss: 1.564790 train_acc: 0.453125 test_loss: 1.518086 test_acc: 0.460800
epoch: 9900 train_loss: 1.319953 train_acc: 0.453125 test_loss: 1.467587 test_acc: 0.479000
epoch: 10000 train_loss: 1.321030 train_acc: 0.500000 test_loss: 1.457034 test_a

epoch: 17900 train_loss: 1.048030 train_acc: 0.664062 test_loss: 1.384630 test_acc: 0.521200
epoch: 18000 train_loss: 0.907300 train_acc: 0.695312 test_loss: 1.396445 test_acc: 0.523000
epoch: 18100 train_loss: 1.065695 train_acc: 0.625000 test_loss: 1.456330 test_acc: 0.499900
epoch: 18200 train_loss: 1.066230 train_acc: 0.617188 test_loss: 1.394887 test_acc: 0.511600
epoch: 18300 train_loss: 1.057412 train_acc: 0.609375 test_loss: 1.391012 test_acc: 0.526000
epoch: 18400 train_loss: 0.945119 train_acc: 0.671875 test_loss: 1.395207 test_acc: 0.523700
epoch: 18500 train_loss: 1.109300 train_acc: 0.554688 test_loss: 1.371292 test_acc: 0.525600
epoch: 18600 train_loss: 1.008515 train_acc: 0.671875 test_loss: 1.394433 test_acc: 0.518500
epoch: 18700 train_loss: 1.118867 train_acc: 0.601562 test_loss: 1.425435 test_acc: 0.515200
epoch: 18800 train_loss: 1.189637 train_acc: 0.585938 test_loss: 1.405612 test_acc: 0.521200
epoch: 18900 train_loss: 1.073710 train_acc: 0.656250 test_loss: 1.443

epoch: 26800 train_loss: 1.094713 train_acc: 0.625000 test_loss: 1.522665 test_acc: 0.514900
epoch: 26900 train_loss: 1.004844 train_acc: 0.625000 test_loss: 1.530729 test_acc: 0.506800
epoch: 27000 train_loss: 0.798982 train_acc: 0.703125 test_loss: 1.505228 test_acc: 0.514600
epoch: 27100 train_loss: 0.801978 train_acc: 0.710938 test_loss: 1.557189 test_acc: 0.506200
epoch: 27200 train_loss: 0.917384 train_acc: 0.679688 test_loss: 1.511830 test_acc: 0.514400
epoch: 27300 train_loss: 0.769900 train_acc: 0.726562 test_loss: 1.474409 test_acc: 0.528600
epoch: 27400 train_loss: 1.163465 train_acc: 0.546875 test_loss: 1.630245 test_acc: 0.499500
epoch: 27500 train_loss: 0.618889 train_acc: 0.781250 test_loss: 1.531214 test_acc: 0.511800
epoch: 27600 train_loss: 0.704379 train_acc: 0.750000 test_loss: 1.526545 test_acc: 0.517700
epoch: 27700 train_loss: 0.850740 train_acc: 0.656250 test_loss: 1.476267 test_acc: 0.520400
epoch: 27800 train_loss: 0.725762 train_acc: 0.734375 test_loss: 1.516

epoch: 35700 train_loss: 0.664759 train_acc: 0.757812 test_loss: 1.649075 test_acc: 0.517100
epoch: 35800 train_loss: 0.856585 train_acc: 0.617188 test_loss: 1.734248 test_acc: 0.507100
epoch: 35900 train_loss: 0.628241 train_acc: 0.750000 test_loss: 1.662446 test_acc: 0.523000
epoch: 36000 train_loss: 0.702631 train_acc: 0.726562 test_loss: 1.665413 test_acc: 0.511600
epoch: 36100 train_loss: 0.569608 train_acc: 0.812500 test_loss: 1.633066 test_acc: 0.522100
epoch: 36200 train_loss: 0.772157 train_acc: 0.765625 test_loss: 1.648816 test_acc: 0.519600
epoch: 36300 train_loss: 0.683905 train_acc: 0.773438 test_loss: 1.731671 test_acc: 0.507900
epoch: 36400 train_loss: 0.849356 train_acc: 0.718750 test_loss: 1.661178 test_acc: 0.502400
epoch: 36500 train_loss: 0.658499 train_acc: 0.742188 test_loss: 1.739575 test_acc: 0.518100
epoch: 36600 train_loss: 0.618782 train_acc: 0.765625 test_loss: 1.667466 test_acc: 0.518600
epoch: 36700 train_loss: 0.730223 train_acc: 0.757812 test_loss: 1.685

epoch: 44600 train_loss: 0.650535 train_acc: 0.781250 test_loss: 1.855866 test_acc: 0.517800
epoch: 44700 train_loss: 0.537456 train_acc: 0.820312 test_loss: 1.874271 test_acc: 0.515700
epoch: 44800 train_loss: 0.736504 train_acc: 0.757812 test_loss: 1.974136 test_acc: 0.499200
epoch: 44900 train_loss: 0.627527 train_acc: 0.781250 test_loss: 1.942474 test_acc: 0.509900
epoch: 45000 train_loss: 0.596070 train_acc: 0.796875 test_loss: 1.975639 test_acc: 0.505500
epoch: 45100 train_loss: 0.661211 train_acc: 0.734375 test_loss: 1.896845 test_acc: 0.516000
epoch: 45200 train_loss: 0.516432 train_acc: 0.828125 test_loss: 1.935654 test_acc: 0.509900
epoch: 45300 train_loss: 0.467156 train_acc: 0.843750 test_loss: 1.935143 test_acc: 0.509700
epoch: 45400 train_loss: 0.537132 train_acc: 0.835938 test_loss: 1.894907 test_acc: 0.508800
epoch: 45500 train_loss: 0.439207 train_acc: 0.875000 test_loss: 2.031152 test_acc: 0.508400
epoch: 45600 train_loss: 0.673584 train_acc: 0.804688 test_loss: 1.883

epoch: 53500 train_loss: 0.487253 train_acc: 0.804688 test_loss: 2.203557 test_acc: 0.498900
epoch: 53600 train_loss: 0.639714 train_acc: 0.765625 test_loss: 2.070501 test_acc: 0.509100
epoch: 53700 train_loss: 0.350347 train_acc: 0.882812 test_loss: 2.173338 test_acc: 0.515400
epoch: 53800 train_loss: 0.614710 train_acc: 0.789062 test_loss: 2.137292 test_acc: 0.506500
epoch: 53900 train_loss: 0.665704 train_acc: 0.789062 test_loss: 2.151165 test_acc: 0.502400
epoch: 54000 train_loss: 0.449576 train_acc: 0.843750 test_loss: 2.126003 test_acc: 0.509200
epoch: 54100 train_loss: 0.513142 train_acc: 0.781250 test_loss: 2.144339 test_acc: 0.502300
epoch: 54200 train_loss: 0.486641 train_acc: 0.812500 test_loss: 2.124817 test_acc: 0.503600
epoch: 54300 train_loss: 0.371794 train_acc: 0.867188 test_loss: 2.066318 test_acc: 0.509400
epoch: 54400 train_loss: 0.373840 train_acc: 0.867188 test_loss: 2.123518 test_acc: 0.507800
epoch: 54500 train_loss: 0.375350 train_acc: 0.859375 test_loss: 2.153

epoch: 62400 train_loss: 0.503376 train_acc: 0.796875 test_loss: 2.337972 test_acc: 0.510500
epoch: 62500 train_loss: 0.483680 train_acc: 0.828125 test_loss: 2.354361 test_acc: 0.508700
epoch: 62600 train_loss: 0.477350 train_acc: 0.812500 test_loss: 2.400017 test_acc: 0.504900
epoch: 62700 train_loss: 0.487084 train_acc: 0.820312 test_loss: 2.339417 test_acc: 0.514200
epoch: 62800 train_loss: 0.347481 train_acc: 0.882812 test_loss: 2.344681 test_acc: 0.495900
epoch: 62900 train_loss: 0.462760 train_acc: 0.851562 test_loss: 2.339230 test_acc: 0.508800
epoch: 63000 train_loss: 0.369245 train_acc: 0.890625 test_loss: 2.303529 test_acc: 0.509200
epoch: 63100 train_loss: 0.237087 train_acc: 0.929688 test_loss: 2.294321 test_acc: 0.510500
epoch: 63200 train_loss: 0.482088 train_acc: 0.820312 test_loss: 2.363024 test_acc: 0.493900
epoch: 63300 train_loss: 0.382300 train_acc: 0.859375 test_loss: 2.308924 test_acc: 0.512600
epoch: 63400 train_loss: 0.361473 train_acc: 0.882812 test_loss: 2.323

epoch: 71300 train_loss: 0.266982 train_acc: 0.890625 test_loss: 2.634072 test_acc: 0.508200
epoch: 71400 train_loss: 0.300945 train_acc: 0.890625 test_loss: 2.622581 test_acc: 0.503800
epoch: 71500 train_loss: 0.245989 train_acc: 0.898438 test_loss: 2.626547 test_acc: 0.498200
epoch: 71600 train_loss: 0.382591 train_acc: 0.867188 test_loss: 2.594345 test_acc: 0.493100
epoch: 71700 train_loss: 0.334686 train_acc: 0.875000 test_loss: 2.588607 test_acc: 0.503400
epoch: 71800 train_loss: 0.302363 train_acc: 0.882812 test_loss: 2.594764 test_acc: 0.508200
epoch: 71900 train_loss: 0.269205 train_acc: 0.914062 test_loss: 2.669623 test_acc: 0.509500
epoch: 72000 train_loss: 0.521500 train_acc: 0.828125 test_loss: 2.551553 test_acc: 0.505800
epoch: 72100 train_loss: 0.481643 train_acc: 0.812500 test_loss: 2.535856 test_acc: 0.506200
epoch: 72200 train_loss: 0.262178 train_acc: 0.906250 test_loss: 2.561046 test_acc: 0.511600
epoch: 72300 train_loss: 0.266837 train_acc: 0.929688 test_loss: 2.583

## 使用一层CNN+两层全连接训练MNIST

In [1]:
import numpy as np
import os
import sys
sys.path.append('..')
from dataset.mnist import load_mnist
from model.simpleConvNet import ConvNet
from utils.trainer import Trainer

(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)

network = ConvNet(input_dim=(1,28,28), 
                  conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
                  hidden_size=100, output_size=10, weight_init_std=0.01)
                        
trainer = Trainer(network, x_train, t_train, x_test, t_test,
                  epochs=3, mini_batch_size=100,
                  optimizer='AdaGrad', optimizer_param={'lr': 0.001})

trainer.train()

epoch: 0 iteration:   0 train_loss:2.303503 train_acc:0.100000 test_loss:2.305879 test_acc:0.125000
epoch: 0 iteration:  50 train_loss:0.429958 train_acc:0.870000 test_loss:0.268638 test_acc:0.906250
epoch: 0 iteration: 100 train_loss:0.336095 train_acc:0.920000 test_loss:0.273559 test_acc:0.890625
epoch: 0 iteration: 150 train_loss:0.359413 train_acc:0.890000 test_loss:0.138331 test_acc:0.953125
epoch: 0 iteration: 200 train_loss:0.096486 train_acc:0.960000 test_loss:0.206923 test_acc:0.914062
epoch: 0 iteration: 250 train_loss:0.127101 train_acc:0.960000 test_loss:0.179519 test_acc:0.968750
epoch: 0 iteration: 300 train_loss:0.251645 train_acc:0.940000 test_loss:0.158335 test_acc:0.953125
epoch: 0 iteration: 350 train_loss:0.098709 train_acc:0.980000 test_loss:0.062182 test_acc:0.984375
epoch: 0 iteration: 400 train_loss:0.168454 train_acc:0.920000 test_loss:0.126980 test_acc:0.953125
epoch: 0 iteration: 450 train_loss:0.154842 train_acc:0.960000 test_loss:0.134728 test_acc:0.976562


## 使用一层CNN+两层全连接训练CIFAR10

In [None]:
import numpy as np
import os
import sys
sys.path.append('..')
from dataset.cifar10 import load_cifar10
from model.simpleConvNet import ConvNet
from utils.trainer import Trainer

(x_train, t_train), (x_test, t_test) = load_cifar10(flatten=False)

network = ConvNet(input_dim=(3,32,32), 
                  conv_param = {'filter_num': 50, 'filter_size': 5, 'pad': 0, 'stride': 1},
                  hidden_size=100, output_size=10, weight_init_std=0.01, pretrained=False)
                        
trainer = Trainer(network, x_train, t_train, x_test, t_test,
                  epochs=10, mini_batch_size=128,
                  optimizer='AdaGrad', optimizer_param={'lr': 0.005})

trainer.train()

epoch: 0 iteration:   0 train_loss:2.303647 train_acc:0.078125 test_loss:3.578681 test_acc:0.070312
epoch: 0 iteration:  50 train_loss:2.139317 train_acc:0.218750 test_loss:2.314676 test_acc:0.187500
epoch: 0 iteration: 100 train_loss:2.010579 train_acc:0.242188 test_loss:1.959988 test_acc:0.343750
epoch: 0 iteration: 150 train_loss:1.963601 train_acc:0.335938 test_loss:1.892104 test_acc:0.382812
epoch: 0 iteration: 200 train_loss:1.853093 train_acc:0.296875 test_loss:1.742271 test_acc:0.367188
epoch: 0 iteration: 250 train_loss:1.767556 train_acc:0.320312 test_loss:1.731083 test_acc:0.414062
epoch: 0 iteration: 300 train_loss:1.717197 train_acc:0.359375 test_loss:1.725037 test_acc:0.359375
epoch: 0 iteration: 350 train_loss:1.616687 train_acc:0.437500 test_loss:1.587075 test_acc:0.406250
epoch: 1 iteration:   0 train_loss:1.510303 train_acc:0.484375 test_loss:1.643855 test_acc:0.421875
epoch: 1 iteration:  50 train_loss:1.508358 train_acc:0.484375 test_loss:1.712907 test_acc:0.343750


## 使用两层CNN+一全连接训练CIFAR10

In [None]:
import numpy as np
import os
import sys
sys.path.append('..')
from dataset.cifar10 import load_cifar10
from model.twoConvNet import ConvNet
from utils.trainer import Trainer

(x_train, t_train), (x_test, t_test) = load_cifar10(flatten=False)

network = ConvNet(input_dim=(3,32,32), 
                  conv_param1={'filter num': 64, 'filter size':3, 'pad':1, 'stride':1},
                  conv_param2={'filter num': 64, 'filter size':3, 'pad':1, 'stride':1},
                  output_size=10, weight_init_std=0.01, pretrained=True)
                        
trainer = Trainer(network, x_train, t_train, x_test, t_test,
                  epochs=20, mini_batch_size=128,
                  optimizer='AdaGrad', optimizer_param={'lr': 0.001})

trainer.train()