In [1]:
import numpy as np
import sys, os
sys.path.append(os.pardir + '/deep-learning-from-scratch/') 
from dataset.mnist import load_mnist
import pickle 
from common.util import im2col, col2im
from common.layers import Relu, SoftmaxWithLoss, Affine
import matplotlib.pyplot as plt
# from common.functions import *
# from common.gradient import numerical_gradient
# from tqdm import tqdm
from collections import OrderedDict
from common.trainer import Trainer

- im2colの引数
    - input_data：（データ数、チャンネル、高さ、横幅）
    - filter_h  ：フィルターの高さ
    - filter_w  ：フィルターの横幅
    - stride　　 : ストライド
    - pad   　　 : パディング

In [2]:
class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad

    def forward(self, x):
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
        out_w = int(1 + (W + 2*self.pad - FW) / self.stride)

        col = im2col(x, FH, FW, self.stride, self.pad)
        # reshapeの引数に-1を定義することで、多次元配列の要素数の辻褄が合うように要素数をまとめてくれる
        col_W = self.W.reshape(FN, -1).T
        out = np.dot(col, col_W) + self.b
        # transposeは多次元配列の軸の順序を変更させる
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        self.x = x
        self.col = col
        self.col_W = col_W

        return out

    def backward(self, dout):
        FN, C, FH, FW = self.W.shape
        dout = dout.transpose(0,2,3,1).reshape(-1, FN)

        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.col.T, dout)
        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)

        dcol = np.dot(dout, self.col_W.T)
        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)

        return dx

class Pooling:
        def __init__(self, pool_h, pool_w, stride = 2, pad = 0):
             self.pool_h = pool_h
             self.pool_w = pool_w
             self.stride = stride
             self.pad = pad
             self.x = None
             self.arg_max = None
        
        def forward(self, x):
             N, C, H, W = x.shape
             out_h = int(1 + (H - self.pool_h) / self.stride)
             out_w = int(1 + (W - self.pool_w) / self.stride)

             col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
             col = col.reshape(-1, self.pool_h*self.pool_w)

             arg_max = np.argmax(col, axis=1)
             out = np.max(col, axis=1)
             out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)

             self.x = x
             self.arg_max = arg_max

             return out

        def backward(self, dout):
            dout = dout.transpose(0, 2, 3, 1)
            
            pool_size = self.pool_h * self.pool_w
            dmax = np.zeros((dout.size, pool_size))
            dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
            dmax = dmax.reshape(dout.shape + (pool_size,)) 
            
            dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
            dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
            
            return dx

class SimpleConvNet():
    def __init__(self, 
                  input_dim = (1, 28, 28), 
                  conv_param = {'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
                  hidden_size = 100, output_size = 10, weight_init_std = 0.01):
          
          filter_num = conv_param['filter_num']
          filter_size = conv_param['filter_size']
          filter_pad = conv_param['pad']
          filter_stride = conv_param['stride']
          input_size = input_dim[1]
          conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
          pool_output_size = int(filter_num * (conv_output_size/2)*(conv_output_size/2))

          self.params = {}
          self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
          self.params['b1'] = np.zeros(filter_num)
          self.params['W2'] = weight_init_std * np.random.rand(pool_output_size, hidden_size)
          self.params['b2'] = np.zeros(hidden_size)
          self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
          self.params['b3'] = np.zeros(output_size)

          self.layers = OrderedDict()
          self.layers['Conv1'] = Convolution(self.params['W1'], 
                                             self.params['b1'], 
                                             conv_param['stride'], 
                                             conv_param['pad'])
          self.layers['Relu1'] = Relu()
          self.layers['Pool1'] = Pooling(pool_h = 2, pool_w = 2, stride = 2)
          self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
          self.layers['Relu1'] = Relu()
          self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
          self.lastLayer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
              x = layer.forward(x)

        return x

    def loss(self, x, t):
         y = self.predict(x)
         return self.lastLayer.forward(y, t)
        
    def gradient(self, x, t):
         # forward
         self.loss(x, t)

         # backward
         dout = 1
         dout = self.lastLayer.backward(dout)
         layers = list(self.layers.values())
         layers.reverse()
         for layer in layers:
              dout = layer.backward(dout)

         # 設定
         grads = {}
         grads['W1'] = self.layers['Conv1'].dW
         grads['b1'] = self.layers['Conv1'].db
         grads['W2'] = self.layers['Affine1'].dW
         grads['b2'] = self.layers['Affine1'].db
         grads['W3'] = self.layers['Affine2'].dW
         grads['b3'] = self.layers['Affine2'].db

         return grads
    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        
        acc = 0.0
        
        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt) 
        
        return acc / x.shape[0]

In [4]:
# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)

# 処理に時間のかかる場合はデータを削減 
x_train, t_train = x_train[:5000], t_train[:5000]
x_test, t_test = x_test[:1000], t_test[:1000]

max_epochs = 20

network = SimpleConvNet(input_dim=(1,28,28), 
                        conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
                        hidden_size=100, output_size=10, weight_init_std=0.01)
                        
trainer = Trainer(network, x_train, t_train, x_test, t_test,
                  epochs=max_epochs, mini_batch_size=100,
                  optimizer='Adam', optimizer_param={'lr': 0.001},
                  evaluate_sample_num_per_epoch=1000)
trainer.train()


  0%|          | 0/1000 [00:00<?, ?it/s]

train loss:2.2968923965500205


  0%|          | 1/1000 [00:02<49:52,  3.00s/it]

=== epoch:1, train acc:0.1, test acc:0.094 ===


  0%|          | 2/1000 [00:03<24:35,  1.48s/it]

train loss:2.2878365996175147


  0%|          | 3/1000 [00:03<16:24,  1.01it/s]

train loss:2.298992671156892


  0%|          | 4/1000 [00:04<12:30,  1.33it/s]

train loss:2.2831278137888344


  0%|          | 5/1000 [00:04<10:24,  1.59it/s]

train loss:2.270209512529116


  1%|          | 6/1000 [00:05<09:14,  1.79it/s]

train loss:2.2606027303698113


  1%|          | 7/1000 [00:05<08:11,  2.02it/s]

train loss:2.273074589647546


  1%|          | 8/1000 [00:05<07:50,  2.11it/s]

train loss:2.244316980805541


  1%|          | 9/1000 [00:06<07:39,  2.16it/s]

train loss:2.2051606538758324


  1%|          | 10/1000 [00:06<07:17,  2.26it/s]

train loss:2.205334574952725


  1%|          | 11/1000 [00:07<07:08,  2.31it/s]

train loss:2.1522563494324727


  1%|          | 12/1000 [00:07<07:03,  2.33it/s]

train loss:2.147392175413135


  1%|▏         | 13/1000 [00:07<06:43,  2.45it/s]

train loss:2.141609349139573


  1%|▏         | 14/1000 [00:08<06:41,  2.46it/s]

train loss:2.084519139462021


  2%|▏         | 15/1000 [00:08<06:44,  2.44it/s]

train loss:2.0479631105610387


  2%|▏         | 16/1000 [00:09<06:43,  2.44it/s]

train loss:1.9502598924777645


  2%|▏         | 17/1000 [00:09<06:29,  2.52it/s]

train loss:1.8742118173838949


  2%|▏         | 18/1000 [00:09<06:38,  2.47it/s]

train loss:1.7908477716506885


  2%|▏         | 19/1000 [00:10<06:23,  2.56it/s]

train loss:1.6934814443688735


  2%|▏         | 20/1000 [00:10<06:14,  2.62it/s]

train loss:1.7001552976278742


  2%|▏         | 21/1000 [00:11<06:23,  2.55it/s]

train loss:1.615003433720853


  2%|▏         | 22/1000 [00:11<06:14,  2.61it/s]

train loss:1.5262200927955079


  2%|▏         | 23/1000 [00:11<06:27,  2.52it/s]

train loss:1.3550542289356746


  2%|▏         | 24/1000 [00:12<06:21,  2.56it/s]

train loss:1.2405595686249848


  2%|▎         | 25/1000 [00:12<06:13,  2.61it/s]

train loss:1.2821668367723875


  3%|▎         | 26/1000 [00:12<06:23,  2.54it/s]

train loss:1.0956788429296602


  3%|▎         | 27/1000 [00:13<06:33,  2.47it/s]

train loss:1.0078338589407096


  3%|▎         | 28/1000 [00:13<06:39,  2.43it/s]

train loss:0.9228182432518693


  3%|▎         | 29/1000 [00:14<06:28,  2.50it/s]

train loss:0.9567396243021112


  3%|▎         | 30/1000 [00:14<06:30,  2.48it/s]

train loss:0.9173549599469598


  3%|▎         | 31/1000 [00:15<06:28,  2.49it/s]

train loss:0.9304121330474676


  3%|▎         | 32/1000 [00:15<06:35,  2.45it/s]

train loss:0.7648157665014681


  3%|▎         | 33/1000 [00:15<06:36,  2.44it/s]

train loss:0.6548207449045874


  3%|▎         | 34/1000 [00:16<06:39,  2.42it/s]

train loss:0.792658730524598


  4%|▎         | 35/1000 [00:16<06:30,  2.47it/s]

train loss:0.6923583950609691


  4%|▎         | 36/1000 [00:17<06:28,  2.48it/s]

train loss:0.5662456655692171


  4%|▎         | 37/1000 [00:17<06:30,  2.47it/s]

train loss:0.7422239113914495


  4%|▍         | 38/1000 [00:17<06:25,  2.49it/s]

train loss:0.7563875470423175


  4%|▍         | 39/1000 [00:18<06:28,  2.47it/s]

train loss:0.572985619494768


  4%|▍         | 40/1000 [00:18<06:31,  2.45it/s]

train loss:0.4396766765200949


  4%|▍         | 41/1000 [00:19<06:33,  2.44it/s]

train loss:0.6277947942520175


  4%|▍         | 42/1000 [00:19<06:36,  2.42it/s]

train loss:0.5371277977677574


  4%|▍         | 43/1000 [00:19<06:26,  2.47it/s]

train loss:0.6604593963921175


  4%|▍         | 44/1000 [00:20<06:25,  2.48it/s]

train loss:0.472339968089463


  4%|▍         | 45/1000 [00:20<06:28,  2.46it/s]

train loss:0.3633656451942621


  5%|▍         | 46/1000 [00:21<06:24,  2.48it/s]

train loss:0.35450183431969357


  5%|▍         | 47/1000 [00:21<06:27,  2.46it/s]

train loss:0.601260747210799


  5%|▍         | 48/1000 [00:21<06:32,  2.42it/s]

train loss:0.41697656025051827


  5%|▍         | 49/1000 [00:22<06:34,  2.41it/s]

train loss:0.30575361988435096


  5%|▌         | 50/1000 [00:22<06:38,  2.38it/s]

train loss:0.47170236759807876
train loss:0.4493506252306432


  5%|▌         | 51/1000 [00:25<18:40,  1.18s/it]

=== epoch:2, train acc:0.855, test acc:0.842 ===


  5%|▌         | 52/1000 [00:26<14:49,  1.07it/s]

train loss:0.8290679439964034


  5%|▌         | 53/1000 [00:26<12:04,  1.31it/s]

train loss:0.5142341694071184


  5%|▌         | 54/1000 [00:26<10:10,  1.55it/s]

train loss:0.34547146537685264


  6%|▌         | 55/1000 [00:27<09:09,  1.72it/s]

train loss:0.49727018237749837


  6%|▌         | 56/1000 [00:27<08:27,  1.86it/s]

train loss:0.3526689645973636


  6%|▌         | 57/1000 [00:28<08:00,  1.96it/s]

train loss:0.29019809618136705


  6%|▌         | 58/1000 [00:28<07:26,  2.11it/s]

train loss:0.5417070538650685


  6%|▌         | 59/1000 [00:28<07:10,  2.19it/s]

train loss:0.3594868000254092


  6%|▌         | 60/1000 [00:29<06:56,  2.26it/s]

train loss:0.23580011809309454


  6%|▌         | 61/1000 [00:29<06:34,  2.38it/s]

train loss:0.38542142567946236


  6%|▌         | 62/1000 [00:30<06:15,  2.50it/s]

train loss:0.32648945805118723


  6%|▋         | 63/1000 [00:30<06:15,  2.49it/s]

train loss:0.47185268276886383


  6%|▋         | 64/1000 [00:30<06:18,  2.48it/s]

train loss:0.6778911879892623


  6%|▋         | 65/1000 [00:31<06:07,  2.55it/s]

train loss:0.46498677927710186


  7%|▋         | 66/1000 [00:31<05:53,  2.64it/s]

train loss:0.4164889406940522


  7%|▋         | 67/1000 [00:32<05:58,  2.60it/s]

train loss:0.41892777144685023


  7%|▋         | 68/1000 [00:32<06:04,  2.56it/s]

train loss:0.28628704894595125


  7%|▋         | 69/1000 [00:32<05:58,  2.60it/s]

train loss:0.37820037555820313


  7%|▋         | 70/1000 [00:33<05:55,  2.62it/s]

train loss:0.4168916065633291


  7%|▋         | 71/1000 [00:33<05:59,  2.59it/s]

train loss:0.26335027037538183


  7%|▋         | 72/1000 [00:33<06:05,  2.54it/s]

train loss:0.264524454564984


  7%|▋         | 73/1000 [00:34<05:57,  2.59it/s]

train loss:0.35878328125038605


  7%|▋         | 74/1000 [00:34<05:47,  2.66it/s]

train loss:0.43924975096937113


  8%|▊         | 75/1000 [00:35<05:53,  2.61it/s]

train loss:0.21258260600268242


  8%|▊         | 76/1000 [00:35<06:02,  2.55it/s]

train loss:0.42252933295997863


  8%|▊         | 77/1000 [00:35<05:55,  2.59it/s]

train loss:0.24012183286555353


  8%|▊         | 78/1000 [00:36<05:47,  2.65it/s]

train loss:0.28557268616134307


  8%|▊         | 79/1000 [00:36<05:53,  2.60it/s]

train loss:0.2813304436142299


  8%|▊         | 80/1000 [00:37<06:00,  2.55it/s]

train loss:0.32922805263806504


  8%|▊         | 81/1000 [00:37<05:52,  2.61it/s]

train loss:0.28276753959794787


  8%|▊         | 82/1000 [00:37<05:43,  2.67it/s]

train loss:0.2305841948240749


  8%|▊         | 83/1000 [00:38<05:51,  2.61it/s]

train loss:0.2604135962099844


  8%|▊         | 84/1000 [00:38<05:58,  2.56it/s]

train loss:0.34900590743438004


  8%|▊         | 85/1000 [00:38<05:50,  2.61it/s]

train loss:0.25512631095332583


  9%|▊         | 86/1000 [00:39<05:41,  2.68it/s]

train loss:0.23540825544198668


  9%|▊         | 87/1000 [00:39<05:49,  2.61it/s]

train loss:0.33429490025258823


  9%|▉         | 88/1000 [00:40<05:56,  2.56it/s]

train loss:0.3693344292924845


  9%|▉         | 89/1000 [00:40<05:49,  2.61it/s]

train loss:0.23790179448358223


  9%|▉         | 90/1000 [00:40<05:40,  2.67it/s]

train loss:0.236744110552344


  9%|▉         | 91/1000 [00:41<05:48,  2.61it/s]

train loss:0.23612169466101868


  9%|▉         | 92/1000 [00:41<05:56,  2.55it/s]

train loss:0.37482333416251584


  9%|▉         | 93/1000 [00:42<05:48,  2.60it/s]

train loss:0.42371122322491706


  9%|▉         | 94/1000 [00:42<05:39,  2.67it/s]

train loss:0.3340910666900808


 10%|▉         | 95/1000 [00:42<05:46,  2.61it/s]

train loss:0.3375356608152562


 10%|▉         | 96/1000 [00:43<05:52,  2.56it/s]

train loss:0.33214675032383745


 10%|▉         | 97/1000 [00:43<05:44,  2.62it/s]

train loss:0.3002696199273066


 10%|▉         | 98/1000 [00:43<05:37,  2.68it/s]

train loss:0.2854390288888809


 10%|▉         | 99/1000 [00:44<05:44,  2.61it/s]

train loss:0.1606371541459878


 10%|█         | 100/1000 [00:44<05:52,  2.55it/s]

train loss:0.321704975876915
train loss:0.19489351538741745


 10%|█         | 101/1000 [00:47<17:11,  1.15s/it]

=== epoch:3, train acc:0.9, test acc:0.888 ===


 10%|█         | 102/1000 [00:48<13:53,  1.08it/s]

train loss:0.18876988028447161


 10%|█         | 103/1000 [00:48<11:40,  1.28it/s]

train loss:0.36564803716339933


 10%|█         | 104/1000 [00:48<10:04,  1.48it/s]

train loss:0.36011624313024526


 10%|█         | 105/1000 [00:49<08:50,  1.69it/s]

train loss:0.24273664262607908


 11%|█         | 106/1000 [00:49<08:06,  1.84it/s]

train loss:0.21772089615713672


 11%|█         | 107/1000 [00:50<07:21,  2.02it/s]

train loss:0.22931138725250244


 11%|█         | 108/1000 [00:50<06:44,  2.20it/s]

train loss:0.2514655017999582


 11%|█         | 109/1000 [00:50<06:19,  2.35it/s]

train loss:0.10056263076384628


 11%|█         | 110/1000 [00:51<06:01,  2.46it/s]

train loss:0.25285189303848143


 11%|█         | 111/1000 [00:51<06:05,  2.43it/s]

train loss:0.22554123170488932


 11%|█         | 112/1000 [00:52<06:01,  2.46it/s]

train loss:0.3351185938879764


 11%|█▏        | 113/1000 [00:52<06:02,  2.45it/s]

train loss:0.2631658336959056


 11%|█▏        | 114/1000 [00:52<06:01,  2.45it/s]

train loss:0.3644571747851602


 12%|█▏        | 115/1000 [00:53<06:03,  2.44it/s]

train loss:0.19950611213815816


 12%|█▏        | 116/1000 [00:53<05:47,  2.54it/s]

train loss:0.2103388643854129


 12%|█▏        | 117/1000 [00:54<05:51,  2.52it/s]

train loss:0.34336224621789735


 12%|█▏        | 118/1000 [00:54<05:50,  2.51it/s]

train loss:0.3659824510859888


 12%|█▏        | 119/1000 [00:54<05:54,  2.48it/s]

train loss:0.3925093452072953


 12%|█▏        | 120/1000 [00:55<05:56,  2.47it/s]

train loss:0.20884235930724246


 12%|█▏        | 121/1000 [00:55<05:46,  2.54it/s]

train loss:0.2908492327850499


 12%|█▏        | 122/1000 [00:55<05:32,  2.64it/s]

train loss:0.27156770631651755


 12%|█▏        | 123/1000 [00:56<05:37,  2.60it/s]

train loss:0.258871799085917


 12%|█▏        | 124/1000 [00:56<05:44,  2.54it/s]

train loss:0.3194390134379258


 12%|█▎        | 125/1000 [00:57<05:36,  2.60it/s]

train loss:0.19775644802909426


 13%|█▎        | 126/1000 [00:57<05:27,  2.67it/s]

train loss:0.1714778457563804


 13%|█▎        | 127/1000 [00:57<05:33,  2.62it/s]

train loss:0.38037617155450165


 13%|█▎        | 128/1000 [00:58<05:45,  2.52it/s]

train loss:0.1884680086733843


 13%|█▎        | 129/1000 [00:58<05:35,  2.59it/s]

train loss:0.24620915856623773


 13%|█▎        | 130/1000 [00:59<05:24,  2.68it/s]

train loss:0.2620657542078395


 13%|█▎        | 131/1000 [00:59<05:35,  2.59it/s]

train loss:0.20675509586985208


 13%|█▎        | 132/1000 [00:59<05:40,  2.55it/s]

train loss:0.21250574728474775


 13%|█▎        | 133/1000 [01:00<05:36,  2.58it/s]

train loss:0.30956897268105127


 13%|█▎        | 134/1000 [01:00<05:25,  2.66it/s]

train loss:0.22275653690877958


 14%|█▎        | 135/1000 [01:00<05:31,  2.61it/s]

train loss:0.2186843354834719


 14%|█▎        | 136/1000 [01:01<05:37,  2.56it/s]

train loss:0.25475906990827746


 14%|█▎        | 137/1000 [01:01<05:31,  2.61it/s]

train loss:0.24388012895940658


 14%|█▍        | 138/1000 [01:02<05:22,  2.67it/s]

train loss:0.11342212934863813


 14%|█▍        | 139/1000 [01:02<05:28,  2.62it/s]

train loss:0.3286157077260102


 14%|█▍        | 140/1000 [01:02<05:34,  2.57it/s]

train loss:0.2489414236594045


 14%|█▍        | 141/1000 [01:03<05:35,  2.56it/s]

train loss:0.3451596132098818


 14%|█▍        | 142/1000 [01:03<05:25,  2.63it/s]

train loss:0.34907310038547507


 14%|█▍        | 143/1000 [01:04<05:31,  2.58it/s]

train loss:0.2889700589768613


 14%|█▍        | 144/1000 [01:04<05:37,  2.53it/s]

train loss:0.4828439265322505


 14%|█▍        | 145/1000 [01:04<05:30,  2.59it/s]

train loss:0.34801397655301747


 15%|█▍        | 146/1000 [01:05<05:22,  2.65it/s]

train loss:0.23968456772924465


 15%|█▍        | 147/1000 [01:05<05:27,  2.60it/s]

train loss:0.1873247585356006


 15%|█▍        | 148/1000 [01:05<05:34,  2.55it/s]

train loss:0.23546378993605213


 15%|█▍        | 149/1000 [01:06<05:27,  2.60it/s]

train loss:0.28890422744093647


 15%|█▌        | 150/1000 [01:06<05:18,  2.67it/s]

train loss:0.20272019122895724
train loss:0.40161959600517877


 15%|█▌        | 151/1000 [01:09<16:20,  1.15s/it]

=== epoch:4, train acc:0.921, test acc:0.907 ===


 15%|█▌        | 152/1000 [01:10<13:11,  1.07it/s]

train loss:0.15724018585984248


 15%|█▌        | 153/1000 [01:10<10:57,  1.29it/s]

train loss:0.29237726247846196


 15%|█▌        | 154/1000 [01:10<09:12,  1.53it/s]

train loss:0.2905801799522545


 16%|█▌        | 155/1000 [01:11<08:14,  1.71it/s]

train loss:0.12999365074571526


 16%|█▌        | 156/1000 [01:11<07:36,  1.85it/s]

train loss:0.19769911123508405


 16%|█▌        | 157/1000 [01:12<07:08,  1.97it/s]

train loss:0.251795380609442


 16%|█▌        | 158/1000 [01:12<06:47,  2.06it/s]

train loss:0.18240318386859772


 16%|█▌        | 159/1000 [01:12<06:22,  2.20it/s]

train loss:0.2610523848395903


 16%|█▌        | 160/1000 [01:13<06:12,  2.26it/s]

train loss:0.18972270319595388


 16%|█▌        | 161/1000 [01:13<06:05,  2.30it/s]

train loss:0.12693626186828919


 16%|█▌        | 162/1000 [01:14<05:47,  2.41it/s]

train loss:0.10785659705553359


 16%|█▋        | 163/1000 [01:14<05:51,  2.38it/s]

train loss:0.19701620180557008


 16%|█▋        | 164/1000 [01:14<05:32,  2.51it/s]

train loss:0.3520806742565727


 16%|█▋        | 165/1000 [01:15<05:34,  2.49it/s]

train loss:0.2894157209225576


 17%|█▋        | 166/1000 [01:15<05:46,  2.41it/s]

train loss:0.3175346607055797


 17%|█▋        | 167/1000 [01:16<05:51,  2.37it/s]

train loss:0.1702426082475032


 17%|█▋        | 168/1000 [01:16<05:52,  2.36it/s]

train loss:0.2092272163793125


 17%|█▋        | 169/1000 [01:17<05:45,  2.40it/s]

train loss:0.14864875653097898


 17%|█▋        | 170/1000 [01:17<05:44,  2.41it/s]

train loss:0.2307148592416079


 17%|█▋        | 171/1000 [01:17<05:42,  2.42it/s]

train loss:0.3006767381858851


 17%|█▋        | 172/1000 [01:18<05:31,  2.50it/s]

train loss:0.32419183604954555


 17%|█▋        | 173/1000 [01:18<05:19,  2.59it/s]

train loss:0.3253603618105676


 17%|█▋        | 174/1000 [01:19<05:25,  2.54it/s]

train loss:0.22249577531081757


 18%|█▊        | 175/1000 [01:19<05:29,  2.50it/s]

train loss:0.22337398735860955


 18%|█▊        | 176/1000 [01:19<05:22,  2.55it/s]

train loss:0.1861221867219461


 18%|█▊        | 177/1000 [01:20<05:14,  2.62it/s]

train loss:0.21363643408410596


 18%|█▊        | 178/1000 [01:20<05:22,  2.55it/s]

train loss:0.14368142866350786


 18%|█▊        | 179/1000 [01:21<05:27,  2.50it/s]

train loss:0.1836206000748511


 18%|█▊        | 180/1000 [01:21<05:19,  2.56it/s]

train loss:0.2720245113751011


 18%|█▊        | 181/1000 [01:21<05:11,  2.63it/s]

train loss:0.22202778089462694


 18%|█▊        | 182/1000 [01:22<05:18,  2.57it/s]

train loss:0.18022495981546446


 18%|█▊        | 183/1000 [01:22<05:23,  2.53it/s]

train loss:0.30881035093300296


 18%|█▊        | 184/1000 [01:22<05:15,  2.59it/s]

train loss:0.19606609923226093


 18%|█▊        | 185/1000 [01:23<05:07,  2.65it/s]

train loss:0.24689364753907214


 19%|█▊        | 186/1000 [01:23<05:13,  2.60it/s]

train loss:0.21287353074469575


 19%|█▊        | 187/1000 [01:24<05:19,  2.54it/s]

train loss:0.18742584815740443


 19%|█▉        | 188/1000 [01:24<05:12,  2.60it/s]

train loss:0.15542762903087867


 19%|█▉        | 189/1000 [01:24<05:04,  2.66it/s]

train loss:0.10503781940737002


 19%|█▉        | 190/1000 [01:25<05:11,  2.60it/s]

train loss:0.14855840532477488


 19%|█▉        | 191/1000 [01:25<05:17,  2.55it/s]

train loss:0.2724334486278186


 19%|█▉        | 192/1000 [01:26<05:14,  2.57it/s]

train loss:0.3407922773447063


 19%|█▉        | 193/1000 [01:26<05:04,  2.65it/s]

train loss:0.1918172005085739


 19%|█▉        | 194/1000 [01:26<05:09,  2.60it/s]

train loss:0.28962730111359847


 20%|█▉        | 195/1000 [01:27<05:15,  2.55it/s]

train loss:0.28426704686319654


 20%|█▉        | 196/1000 [01:27<05:08,  2.61it/s]

train loss:0.2375723605760215


 20%|█▉        | 197/1000 [01:27<05:00,  2.67it/s]

train loss:0.18829929543913643


 20%|█▉        | 198/1000 [01:28<05:06,  2.61it/s]

train loss:0.24408408727378386


 20%|█▉        | 199/1000 [01:28<05:13,  2.55it/s]

train loss:0.2798444516625453


 20%|██        | 200/1000 [01:29<05:08,  2.60it/s]

train loss:0.13791542973394477
train loss:0.11242774197648236


 20%|██        | 201/1000 [01:32<15:19,  1.15s/it]

=== epoch:5, train acc:0.945, test acc:0.928 ===


 20%|██        | 202/1000 [01:32<12:11,  1.09it/s]

train loss:0.25445281255280466


 20%|██        | 203/1000 [01:32<09:57,  1.33it/s]

train loss:0.2400745009209292


 20%|██        | 204/1000 [01:33<08:25,  1.57it/s]

train loss:0.10784945103810355


 20%|██        | 205/1000 [01:33<07:27,  1.78it/s]

train loss:0.1933368206237881


 21%|██        | 206/1000 [01:33<06:48,  1.94it/s]

train loss:0.16060281529654982


 21%|██        | 207/1000 [01:34<06:26,  2.05it/s]

train loss:0.18558318594595619


 21%|██        | 208/1000 [01:34<06:08,  2.15it/s]

train loss:0.19847603795673593


 21%|██        | 209/1000 [01:35<05:50,  2.26it/s]

train loss:0.1636362681114386


 21%|██        | 210/1000 [01:35<05:42,  2.31it/s]

train loss:0.1823382965787115


 21%|██        | 211/1000 [01:35<05:38,  2.33it/s]

train loss:0.11731111231447994


 21%|██        | 212/1000 [01:36<05:38,  2.33it/s]

train loss:0.18052624615729965


 21%|██▏       | 213/1000 [01:36<05:36,  2.34it/s]

train loss:0.19341542684555757


 21%|██▏       | 214/1000 [01:37<05:37,  2.33it/s]

train loss:0.11190083157161329


 22%|██▏       | 215/1000 [01:37<05:27,  2.40it/s]

train loss:0.24078005460089663


 22%|██▏       | 216/1000 [01:38<05:25,  2.41it/s]

train loss:0.13796171893666728


 22%|██▏       | 217/1000 [01:38<05:24,  2.41it/s]

train loss:0.18566721424650853


 22%|██▏       | 218/1000 [01:38<05:19,  2.45it/s]

train loss:0.11354985450396912


 22%|██▏       | 219/1000 [01:39<05:19,  2.44it/s]

train loss:0.18303888608656485


 22%|██▏       | 220/1000 [01:39<05:25,  2.40it/s]

train loss:0.2993384361937925


 22%|██▏       | 221/1000 [01:40<05:26,  2.39it/s]

train loss:0.17445850821773448


 22%|██▏       | 222/1000 [01:40<05:29,  2.36it/s]

train loss:0.1886396141716588


 22%|██▏       | 223/1000 [01:40<05:20,  2.43it/s]

train loss:0.152695981821444


 22%|██▏       | 224/1000 [01:41<05:19,  2.43it/s]

train loss:0.16434221197770552


 22%|██▎       | 225/1000 [01:41<05:20,  2.42it/s]

train loss:0.1240522592779899


 23%|██▎       | 226/1000 [01:42<05:15,  2.46it/s]

train loss:0.17304078455713148


 23%|██▎       | 227/1000 [01:42<05:26,  2.37it/s]

train loss:0.15034477866865992


 23%|██▎       | 228/1000 [01:43<05:26,  2.36it/s]

train loss:0.17923953482404686


 23%|██▎       | 229/1000 [01:43<05:23,  2.38it/s]

train loss:0.16552534316083733


 23%|██▎       | 230/1000 [01:43<05:24,  2.37it/s]

train loss:0.16413888430964943


 23%|██▎       | 231/1000 [01:44<05:16,  2.43it/s]

train loss:0.15874950840096566


 23%|██▎       | 232/1000 [01:44<05:16,  2.43it/s]

train loss:0.10285495299374028


 23%|██▎       | 233/1000 [01:45<05:20,  2.39it/s]

train loss:0.14654994760862267


 23%|██▎       | 234/1000 [01:45<05:25,  2.35it/s]

train loss:0.1401236799582524


 24%|██▎       | 235/1000 [01:46<05:31,  2.31it/s]

train loss:0.1097620784294532


 24%|██▎       | 236/1000 [01:46<05:29,  2.32it/s]

train loss:0.13077006660144752


 24%|██▎       | 237/1000 [01:46<05:25,  2.34it/s]

train loss:0.1894711978491776


 24%|██▍       | 238/1000 [01:47<05:25,  2.34it/s]

train loss:0.13871740934793877


 24%|██▍       | 239/1000 [01:47<05:18,  2.39it/s]

train loss:0.18926664930574602


 24%|██▍       | 240/1000 [01:48<05:17,  2.40it/s]

train loss:0.15569149324591214


 24%|██▍       | 241/1000 [01:48<05:16,  2.40it/s]

train loss:0.09553324218905955


 24%|██▍       | 242/1000 [01:48<05:13,  2.42it/s]

train loss:0.13845460830431375


 24%|██▍       | 243/1000 [01:49<05:15,  2.40it/s]

train loss:0.07504354748033991


 24%|██▍       | 244/1000 [01:49<05:18,  2.37it/s]

train loss:0.12321480155177583


 24%|██▍       | 245/1000 [01:50<05:18,  2.37it/s]

train loss:0.12266935966438762


 25%|██▍       | 246/1000 [01:50<05:20,  2.35it/s]

train loss:0.17868164623517843


 25%|██▍       | 247/1000 [01:51<05:11,  2.42it/s]

train loss:0.17403974829788957


 25%|██▍       | 248/1000 [01:51<05:10,  2.42it/s]

train loss:0.12573232609089754


 25%|██▍       | 249/1000 [01:51<05:11,  2.41it/s]

train loss:0.09140388399431297


 25%|██▌       | 250/1000 [01:52<05:06,  2.45it/s]

train loss:0.13104396172485086
train loss:0.2855137123710112


 25%|██▌       | 251/1000 [01:55<14:44,  1.18s/it]

=== epoch:6, train acc:0.948, test acc:0.928 ===


 25%|██▌       | 252/1000 [01:55<11:39,  1.07it/s]

train loss:0.10589708132850911


 25%|██▌       | 253/1000 [01:55<09:32,  1.31it/s]

train loss:0.12546047178086636


 25%|██▌       | 254/1000 [01:56<08:16,  1.50it/s]

train loss:0.19636879077897476


 26%|██▌       | 255/1000 [01:56<07:24,  1.68it/s]

train loss:0.1393526706841145


 26%|██▌       | 256/1000 [01:57<06:47,  1.83it/s]

train loss:0.16483778417655284


 26%|██▌       | 257/1000 [01:57<06:09,  2.01it/s]

train loss:0.20325749473418683


 26%|██▌       | 258/1000 [01:58<05:37,  2.20it/s]

train loss:0.1964696308027082


 26%|██▌       | 259/1000 [01:58<05:17,  2.33it/s]

train loss:0.14982183353498588


 26%|██▌       | 260/1000 [01:58<05:03,  2.44it/s]

train loss:0.15423824368010464


 26%|██▌       | 261/1000 [01:59<05:07,  2.41it/s]

train loss:0.19418407119567382


 26%|██▌       | 262/1000 [01:59<05:02,  2.44it/s]

train loss:0.20400676113430244


 26%|██▋       | 263/1000 [01:59<05:02,  2.43it/s]

train loss:0.09913776933830888


 26%|██▋       | 264/1000 [02:00<05:02,  2.43it/s]

train loss:0.1962097821855798


 26%|██▋       | 265/1000 [02:00<05:02,  2.43it/s]

train loss:0.1318107380060819


 27%|██▋       | 266/1000 [02:01<04:49,  2.54it/s]

train loss:0.09345257309453191


 27%|██▋       | 267/1000 [02:01<04:50,  2.52it/s]

train loss:0.06457270658614857


 27%|██▋       | 268/1000 [02:01<04:49,  2.53it/s]

train loss:0.18082110156256082


 27%|██▋       | 269/1000 [02:02<04:54,  2.48it/s]

train loss:0.21297829339501725


 27%|██▋       | 270/1000 [02:02<04:55,  2.47it/s]

train loss:0.1455335693718656


 27%|██▋       | 271/1000 [02:03<04:47,  2.54it/s]

train loss:0.1026747221193396


 27%|██▋       | 272/1000 [02:03<04:37,  2.63it/s]

train loss:0.07913904214425141


 27%|██▋       | 273/1000 [02:03<04:42,  2.58it/s]

train loss:0.12584770552694724


 27%|██▋       | 274/1000 [02:04<04:47,  2.53it/s]

train loss:0.1236218596986975


 28%|██▊       | 275/1000 [02:04<04:40,  2.58it/s]

train loss:0.10008962439064628


 28%|██▊       | 276/1000 [02:05<04:33,  2.65it/s]

train loss:0.100061340260379


 28%|██▊       | 277/1000 [02:05<04:38,  2.60it/s]

train loss:0.1420606744249804


 28%|██▊       | 278/1000 [02:05<04:43,  2.54it/s]

train loss:0.277769588342648


 28%|██▊       | 279/1000 [02:06<04:39,  2.58it/s]

train loss:0.12171303742319976


 28%|██▊       | 280/1000 [02:06<04:32,  2.64it/s]

train loss:0.15418017305364354


 28%|██▊       | 281/1000 [02:06<04:37,  2.59it/s]

train loss:0.25918295879010517


 28%|██▊       | 282/1000 [02:07<04:43,  2.53it/s]

train loss:0.08920229139878336


 28%|██▊       | 283/1000 [02:07<04:37,  2.58it/s]

train loss:0.16549902664588223


 28%|██▊       | 284/1000 [02:08<04:30,  2.65it/s]

train loss:0.07314148806139024


 28%|██▊       | 285/1000 [02:08<04:34,  2.61it/s]

train loss:0.09630374247785824


 29%|██▊       | 286/1000 [02:08<04:39,  2.55it/s]

train loss:0.08257925303443572


 29%|██▊       | 287/1000 [02:09<04:34,  2.60it/s]

train loss:0.15422364554528825


 29%|██▉       | 288/1000 [02:09<04:26,  2.67it/s]

train loss:0.166852202297316


 29%|██▉       | 289/1000 [02:10<04:31,  2.62it/s]

train loss:0.053965545145594984


 29%|██▉       | 290/1000 [02:10<04:38,  2.55it/s]

train loss:0.12292019484578333


 29%|██▉       | 291/1000 [02:10<04:34,  2.59it/s]

train loss:0.1385560415877422


 29%|██▉       | 292/1000 [02:11<04:27,  2.64it/s]

train loss:0.0948599755196039


 29%|██▉       | 293/1000 [02:11<04:32,  2.59it/s]

train loss:0.0783348944367882


 29%|██▉       | 294/1000 [02:12<04:39,  2.53it/s]

train loss:0.10033789209714392


 30%|██▉       | 295/1000 [02:12<04:33,  2.58it/s]

train loss:0.06424658650076372


 30%|██▉       | 296/1000 [02:12<04:26,  2.65it/s]

train loss:0.20773821435379797


 30%|██▉       | 297/1000 [02:13<04:31,  2.59it/s]

train loss:0.10812039623127262


 30%|██▉       | 298/1000 [02:13<04:35,  2.55it/s]

train loss:0.11121978362238413


 30%|██▉       | 299/1000 [02:13<04:29,  2.60it/s]

train loss:0.05459688656730229


 30%|███       | 300/1000 [02:14<04:22,  2.66it/s]

train loss:0.131858026954337
train loss:0.148917965106323


 30%|███       | 301/1000 [02:17<13:27,  1.16s/it]

=== epoch:7, train acc:0.953, test acc:0.94 ===


 30%|███       | 302/1000 [02:17<10:48,  1.08it/s]

train loss:0.08038429594543926


 30%|███       | 303/1000 [02:18<08:59,  1.29it/s]

train loss:0.07755324672237834


 30%|███       | 304/1000 [02:18<07:32,  1.54it/s]

train loss:0.11753959003675311


 30%|███       | 305/1000 [02:18<06:45,  1.71it/s]

train loss:0.10747114150268787


 31%|███       | 306/1000 [02:19<06:13,  1.86it/s]

train loss:0.06142095273080242


 31%|███       | 307/1000 [02:19<05:51,  1.97it/s]

train loss:0.20097162898709403


 31%|███       | 308/1000 [02:20<05:35,  2.06it/s]

train loss:0.11307850505985853


 31%|███       | 309/1000 [02:20<05:15,  2.19it/s]

train loss:0.07914658188814393


 31%|███       | 310/1000 [02:20<05:08,  2.24it/s]

train loss:0.10682902085511775


 31%|███       | 311/1000 [02:21<05:01,  2.28it/s]

train loss:0.049480225321724074


 31%|███       | 312/1000 [02:21<04:48,  2.38it/s]

train loss:0.15973901431736134


 31%|███▏      | 313/1000 [02:22<04:51,  2.36it/s]

train loss:0.11016866895274535


 31%|███▏      | 314/1000 [02:22<04:35,  2.49it/s]

train loss:0.14819385342388441


 32%|███▏      | 315/1000 [02:22<04:34,  2.50it/s]

train loss:0.09924196909962277


 32%|███▏      | 316/1000 [02:23<04:41,  2.43it/s]

train loss:0.0891493753073443


 32%|███▏      | 317/1000 [02:23<04:45,  2.39it/s]

train loss:0.2821047790908222


 32%|███▏      | 318/1000 [02:24<04:47,  2.37it/s]

train loss:0.054285756060130905


 32%|███▏      | 319/1000 [02:24<04:39,  2.43it/s]

train loss:0.07187447636231918


 32%|███▏      | 320/1000 [02:25<04:39,  2.44it/s]

train loss:0.09811457594169909


 32%|███▏      | 321/1000 [02:25<04:38,  2.44it/s]

train loss:0.10694236710293238


 32%|███▏      | 322/1000 [02:25<04:28,  2.52it/s]

train loss:0.09126190260004607


 32%|███▏      | 323/1000 [02:26<04:19,  2.61it/s]

train loss:0.05703220220450758


 32%|███▏      | 324/1000 [02:26<04:22,  2.58it/s]

train loss:0.05813666909836648


 32%|███▎      | 325/1000 [02:26<04:26,  2.53it/s]

train loss:0.17971711548730393


 33%|███▎      | 326/1000 [02:27<04:20,  2.59it/s]

train loss:0.07906861751002765


 33%|███▎      | 327/1000 [02:27<04:13,  2.65it/s]

train loss:0.17544347060522572


 33%|███▎      | 328/1000 [02:28<04:19,  2.59it/s]

train loss:0.1899420380815813


 33%|███▎      | 329/1000 [02:28<04:24,  2.54it/s]

train loss:0.1248342979900336


 33%|███▎      | 330/1000 [02:28<04:19,  2.58it/s]

train loss:0.0819260501634512


 33%|███▎      | 331/1000 [02:29<04:13,  2.64it/s]

train loss:0.14579307483802403


 33%|███▎      | 332/1000 [02:29<04:18,  2.59it/s]

train loss:0.04340538372393788


 33%|███▎      | 333/1000 [02:30<04:23,  2.53it/s]

train loss:0.08110215032828538


 33%|███▎      | 334/1000 [02:30<04:17,  2.59it/s]

train loss:0.046960011366187375


 34%|███▎      | 335/1000 [02:30<04:10,  2.66it/s]

train loss:0.04540475953156849


 34%|███▎      | 336/1000 [02:31<04:15,  2.60it/s]

train loss:0.1604246507167842


 34%|███▎      | 337/1000 [02:31<04:20,  2.55it/s]

train loss:0.05515046781435279


 34%|███▍      | 338/1000 [02:31<04:13,  2.61it/s]

train loss:0.1132916999513507


 34%|███▍      | 339/1000 [02:32<04:07,  2.68it/s]

train loss:0.07580694816453647


 34%|███▍      | 340/1000 [02:32<04:12,  2.61it/s]

train loss:0.1030608092884184


 34%|███▍      | 341/1000 [02:33<04:18,  2.55it/s]

train loss:0.058939094662831495


 34%|███▍      | 342/1000 [02:33<04:12,  2.60it/s]

train loss:0.09805097833140274


 34%|███▍      | 343/1000 [02:33<04:06,  2.67it/s]

train loss:0.15893331645957876


 34%|███▍      | 344/1000 [02:34<04:11,  2.61it/s]

train loss:0.05120540737205709


 34%|███▍      | 345/1000 [02:34<04:16,  2.56it/s]

train loss:0.0704291496936055


 35%|███▍      | 346/1000 [02:35<04:11,  2.60it/s]

train loss:0.15926809584781973


 35%|███▍      | 347/1000 [02:35<04:05,  2.66it/s]

train loss:0.1409402787633293


 35%|███▍      | 348/1000 [02:35<04:11,  2.59it/s]

train loss:0.07387357692264605


 35%|███▍      | 349/1000 [02:36<04:16,  2.54it/s]

train loss:0.10512573351919373


 35%|███▌      | 350/1000 [02:36<04:10,  2.59it/s]

train loss:0.11757589167254741
train loss:0.08547791035152034


 35%|███▌      | 351/1000 [02:39<12:26,  1.15s/it]

=== epoch:8, train acc:0.96, test acc:0.946 ===


 35%|███▌      | 352/1000 [02:39<09:54,  1.09it/s]

train loss:0.08070813394947816


 35%|███▌      | 353/1000 [02:40<08:04,  1.34it/s]

train loss:0.05109509371396472


 35%|███▌      | 354/1000 [02:40<06:49,  1.58it/s]

train loss:0.08241722141403177


 36%|███▌      | 355/1000 [02:41<06:01,  1.78it/s]

train loss:0.10257326855564164


 36%|███▌      | 356/1000 [02:41<05:30,  1.95it/s]

train loss:0.0605995532017842


 36%|███▌      | 357/1000 [02:41<05:12,  2.06it/s]

train loss:0.09692057231315383


 36%|███▌      | 358/1000 [02:42<04:57,  2.16it/s]

train loss:0.05627542040160536


 36%|███▌      | 359/1000 [02:42<04:43,  2.26it/s]

train loss:0.09396020103895038


 36%|███▌      | 360/1000 [02:43<04:36,  2.31it/s]

train loss:0.08227562837225222


 36%|███▌      | 361/1000 [02:43<04:34,  2.33it/s]

train loss:0.16613920644828073


 36%|███▌      | 362/1000 [02:43<04:33,  2.33it/s]

train loss:0.08809729527802199


 36%|███▋      | 363/1000 [02:44<04:32,  2.34it/s]

train loss:0.029663475455848326


 36%|███▋      | 364/1000 [02:44<04:31,  2.34it/s]

train loss:0.0649780135285996


 36%|███▋      | 365/1000 [02:45<04:23,  2.41it/s]

train loss:0.15665196218838928


 37%|███▋      | 366/1000 [02:45<04:29,  2.36it/s]

train loss:0.1317339009516741


 37%|███▋      | 367/1000 [02:46<04:32,  2.32it/s]

train loss:0.036932123963487946


 37%|███▋      | 368/1000 [02:46<04:31,  2.33it/s]

train loss:0.0955101102790694


 37%|███▋      | 369/1000 [02:46<04:32,  2.31it/s]

train loss:0.09040960559177622


 37%|███▋      | 370/1000 [02:47<04:44,  2.21it/s]

train loss:0.1256012859740667


 37%|███▋      | 371/1000 [02:47<04:43,  2.22it/s]

train loss:0.1272674676235486


 37%|███▋      | 372/1000 [02:48<04:44,  2.21it/s]

train loss:0.05872689843070107


 37%|███▋      | 373/1000 [02:48<04:31,  2.31it/s]

train loss:0.0952408135632628


 37%|███▋      | 374/1000 [02:49<04:27,  2.34it/s]

train loss:0.15359127305403955


 38%|███▊      | 375/1000 [02:49<04:24,  2.36it/s]

train loss:0.08148274164220166


 38%|███▊      | 376/1000 [02:49<04:19,  2.40it/s]

train loss:0.06513233458927935


 38%|███▊      | 377/1000 [02:50<04:19,  2.40it/s]

train loss:0.2329243088141876


 38%|███▊      | 378/1000 [02:50<04:20,  2.39it/s]

train loss:0.05871474970146236


 38%|███▊      | 379/1000 [02:51<04:20,  2.39it/s]

train loss:0.11336451995199028


 38%|███▊      | 380/1000 [02:51<04:22,  2.36it/s]

train loss:0.19578300470508972


 38%|███▊      | 381/1000 [02:51<04:16,  2.42it/s]

train loss:0.09975366187948564


 38%|███▊      | 382/1000 [02:52<04:16,  2.41it/s]

train loss:0.038613391552203696


 38%|███▊      | 383/1000 [02:52<04:17,  2.40it/s]

train loss:0.08612724398769181


 38%|███▊      | 384/1000 [02:53<04:13,  2.43it/s]

train loss:0.08668238774832422


 38%|███▊      | 385/1000 [02:53<04:13,  2.43it/s]

train loss:0.06766404021922362


 39%|███▊      | 386/1000 [02:54<04:15,  2.41it/s]

train loss:0.07520218809215284


 39%|███▊      | 387/1000 [02:54<04:14,  2.40it/s]

train loss:0.1251901667100544


 39%|███▉      | 388/1000 [02:54<04:17,  2.38it/s]

train loss:0.07728382640850197


 39%|███▉      | 389/1000 [02:55<04:10,  2.44it/s]

train loss:0.19931622402018545


 39%|███▉      | 390/1000 [02:55<04:10,  2.44it/s]

train loss:0.07604624448500487


 39%|███▉      | 391/1000 [02:56<04:10,  2.43it/s]

train loss:0.1252784726410648


 39%|███▉      | 392/1000 [02:56<04:07,  2.46it/s]

train loss:0.11862704998568722


 39%|███▉      | 393/1000 [02:56<04:08,  2.44it/s]

train loss:0.08165722275650449


 39%|███▉      | 394/1000 [02:57<04:10,  2.42it/s]

train loss:0.08638363060802265


 40%|███▉      | 395/1000 [02:57<04:10,  2.41it/s]

train loss:0.0959731832664118


 40%|███▉      | 396/1000 [02:58<04:13,  2.39it/s]

train loss:0.10086448650362267


 40%|███▉      | 397/1000 [02:58<04:07,  2.44it/s]

train loss:0.19358536053907416


 40%|███▉      | 398/1000 [02:59<04:07,  2.43it/s]

train loss:0.06663865360372796


 40%|███▉      | 399/1000 [02:59<04:07,  2.42it/s]

train loss:0.16477340035415428


 40%|████      | 400/1000 [02:59<04:03,  2.46it/s]

train loss:0.07517466796181077
train loss:0.04998346463484273


 40%|████      | 401/1000 [03:02<11:49,  1.18s/it]

=== epoch:9, train acc:0.965, test acc:0.952 ===


 40%|████      | 402/1000 [03:03<09:21,  1.07it/s]

train loss:0.07283618731205911


 40%|████      | 403/1000 [03:03<07:37,  1.30it/s]

train loss:0.04491389414730343


 40%|████      | 404/1000 [03:03<06:35,  1.51it/s]

train loss:0.1693287434871279


 40%|████      | 405/1000 [03:04<05:53,  1.68it/s]

train loss:0.05621559867875044


 41%|████      | 406/1000 [03:04<05:23,  1.84it/s]

train loss:0.08997740301187238


 41%|████      | 407/1000 [03:05<04:53,  2.02it/s]

train loss:0.13003794925148424


 41%|████      | 408/1000 [03:05<04:28,  2.21it/s]

train loss:0.10528407269432505


 41%|████      | 409/1000 [03:05<04:12,  2.34it/s]

train loss:0.12954203242271667


 41%|████      | 410/1000 [03:06<04:01,  2.44it/s]

train loss:0.06178495207904429


 41%|████      | 411/1000 [03:06<04:03,  2.42it/s]

train loss:0.04041346702880346


 41%|████      | 412/1000 [03:07<04:00,  2.45it/s]

train loss:0.09585527373673443


 41%|████▏     | 413/1000 [03:07<04:00,  2.44it/s]

train loss:0.09789422970551148


 41%|████▏     | 414/1000 [03:07<03:59,  2.44it/s]

train loss:0.06034574596713311


 42%|████▏     | 415/1000 [03:08<04:00,  2.44it/s]

train loss:0.06637894640009039


 42%|████▏     | 416/1000 [03:08<03:50,  2.54it/s]

train loss:0.03087869989864438


 42%|████▏     | 417/1000 [03:09<03:51,  2.51it/s]

train loss:0.12153057968506552


 42%|████▏     | 418/1000 [03:09<03:50,  2.53it/s]

train loss:0.13883972297240238


 42%|████▏     | 419/1000 [03:09<03:52,  2.50it/s]

train loss:0.0794858041560228


 42%|████▏     | 420/1000 [03:10<03:54,  2.47it/s]

train loss:0.037484449591898304


 42%|████▏     | 421/1000 [03:10<03:47,  2.54it/s]

train loss:0.05332560099406921


 42%|████▏     | 422/1000 [03:11<03:39,  2.63it/s]

train loss:0.1071550087615681


 42%|████▏     | 423/1000 [03:11<03:42,  2.59it/s]

train loss:0.043844247660986005


 42%|████▏     | 424/1000 [03:11<03:46,  2.55it/s]

train loss:0.031108037234741436


 42%|████▎     | 425/1000 [03:12<03:41,  2.60it/s]

train loss:0.07647119232462078


 43%|████▎     | 426/1000 [03:12<03:35,  2.67it/s]

train loss:0.05380632313712802


 43%|████▎     | 427/1000 [03:12<03:39,  2.61it/s]

train loss:0.03811958449516424


 43%|████▎     | 428/1000 [03:13<03:44,  2.55it/s]

train loss:0.061208642378603934


 43%|████▎     | 429/1000 [03:13<03:40,  2.59it/s]

train loss:0.14781455546344932


 43%|████▎     | 430/1000 [03:14<03:34,  2.66it/s]

train loss:0.07307919746475496


 43%|████▎     | 431/1000 [03:14<03:38,  2.60it/s]

train loss:0.17048690938901484


 43%|████▎     | 432/1000 [03:14<03:42,  2.55it/s]

train loss:0.06186926251620121


 43%|████▎     | 433/1000 [03:15<03:38,  2.60it/s]

train loss:0.08178438575339982


 43%|████▎     | 434/1000 [03:15<03:32,  2.66it/s]

train loss:0.09157768352491341


 44%|████▎     | 435/1000 [03:16<03:36,  2.61it/s]

train loss:0.08647704483925332


 44%|████▎     | 436/1000 [03:16<03:40,  2.55it/s]

train loss:0.0356296602057026


 44%|████▎     | 437/1000 [03:16<03:35,  2.61it/s]

train loss:0.04032002048968362


 44%|████▍     | 438/1000 [03:17<03:30,  2.67it/s]

train loss:0.041173955587368694


 44%|████▍     | 439/1000 [03:17<03:34,  2.62it/s]

train loss:0.07037137032620805


 44%|████▍     | 440/1000 [03:17<03:39,  2.55it/s]

train loss:0.045144037688430665


 44%|████▍     | 441/1000 [03:18<03:35,  2.60it/s]

train loss:0.06043413081901417


 44%|████▍     | 442/1000 [03:18<03:28,  2.67it/s]

train loss:0.04592107123963762


 44%|████▍     | 443/1000 [03:19<03:32,  2.62it/s]

train loss:0.08415608888958896


 44%|████▍     | 444/1000 [03:19<03:37,  2.56it/s]

train loss:0.0934517940855399


 44%|████▍     | 445/1000 [03:19<03:32,  2.61it/s]

train loss:0.048292571004203956


 45%|████▍     | 446/1000 [03:20<03:27,  2.67it/s]

train loss:0.037254320886405685


 45%|████▍     | 447/1000 [03:20<03:32,  2.60it/s]

train loss:0.07339796368472794


 45%|████▍     | 448/1000 [03:21<03:38,  2.53it/s]

train loss:0.025673200042207682


 45%|████▍     | 449/1000 [03:21<03:34,  2.57it/s]

train loss:0.08661106421068253


 45%|████▌     | 450/1000 [03:21<03:28,  2.63it/s]

train loss:0.17322213754702076
train loss:0.044221682212802815


 45%|████▌     | 451/1000 [03:24<10:35,  1.16s/it]

=== epoch:10, train acc:0.974, test acc:0.953 ===


 45%|████▌     | 452/1000 [03:25<08:33,  1.07it/s]

train loss:0.06699684355110613


 45%|████▌     | 453/1000 [03:25<07:11,  1.27it/s]

train loss:0.06514892671933897


 45%|████▌     | 454/1000 [03:26<06:02,  1.50it/s]

train loss:0.059291201486780354


 46%|████▌     | 455/1000 [03:26<05:29,  1.65it/s]

train loss:0.1033024214402668


 46%|████▌     | 456/1000 [03:26<05:04,  1.78it/s]

train loss:0.04012083711098595


 46%|████▌     | 457/1000 [03:27<04:46,  1.89it/s]

train loss:0.10424017715252992


 46%|████▌     | 458/1000 [03:27<04:34,  1.97it/s]

train loss:0.035336944542869106


 46%|████▌     | 459/1000 [03:28<04:15,  2.12it/s]

train loss:0.08348298731846641


 46%|████▌     | 460/1000 [03:28<04:06,  2.19it/s]

train loss:0.07433866029907171


 46%|████▌     | 461/1000 [03:29<03:59,  2.25it/s]

train loss:0.04709326672046621


 46%|████▌     | 462/1000 [03:29<03:48,  2.35it/s]

train loss:0.11757631186007876


 46%|████▋     | 463/1000 [03:29<03:50,  2.33it/s]

train loss:0.10713159447008543


 46%|████▋     | 464/1000 [03:30<03:37,  2.46it/s]

train loss:0.1356436390203411


 46%|████▋     | 465/1000 [03:30<03:35,  2.48it/s]

train loss:0.07678483994691347


 47%|████▋     | 466/1000 [03:31<03:41,  2.41it/s]

train loss:0.05922900996699583


 47%|████▋     | 467/1000 [03:31<03:43,  2.38it/s]

train loss:0.10493150720610155


 47%|████▋     | 468/1000 [03:31<03:44,  2.37it/s]

train loss:0.06391009643654472


 47%|████▋     | 469/1000 [03:32<03:38,  2.43it/s]

train loss:0.04764176046235551


 47%|████▋     | 470/1000 [03:32<03:38,  2.43it/s]

train loss:0.08585687261303461


 47%|████▋     | 471/1000 [03:33<03:37,  2.43it/s]

train loss:0.05721656500348507


 47%|████▋     | 472/1000 [03:33<03:30,  2.51it/s]

train loss:0.04653423060125996


 47%|████▋     | 473/1000 [03:33<03:21,  2.61it/s]

train loss:0.06787979830782356


 47%|████▋     | 474/1000 [03:34<03:27,  2.53it/s]

train loss:0.0252025264340012


 48%|████▊     | 475/1000 [03:34<03:30,  2.50it/s]

train loss:0.02611574468757053


 48%|████▊     | 476/1000 [03:35<03:25,  2.55it/s]

train loss:0.04772647872769067


 48%|████▊     | 477/1000 [03:35<03:21,  2.60it/s]

train loss:0.14175242764143495


 48%|████▊     | 478/1000 [03:35<03:24,  2.55it/s]

train loss:0.019829693296298967


 48%|████▊     | 479/1000 [03:36<03:27,  2.51it/s]

train loss:0.059507097628997835


 48%|████▊     | 480/1000 [03:36<03:22,  2.57it/s]

train loss:0.05924078703983675


 48%|████▊     | 481/1000 [03:37<03:17,  2.63it/s]

train loss:0.04679259833292727


 48%|████▊     | 482/1000 [03:37<03:23,  2.55it/s]

train loss:0.036442848935603685


 48%|████▊     | 483/1000 [03:37<03:25,  2.51it/s]

train loss:0.09623101921285765


 48%|████▊     | 484/1000 [03:38<03:21,  2.56it/s]

train loss:0.031767900508618115


 48%|████▊     | 485/1000 [03:38<03:15,  2.63it/s]

train loss:0.06764485714702245


 49%|████▊     | 486/1000 [03:38<03:21,  2.55it/s]

train loss:0.1662252876711485


 49%|████▊     | 487/1000 [03:39<03:24,  2.51it/s]

train loss:0.04422885943306601


 49%|████▉     | 488/1000 [03:39<03:19,  2.56it/s]

train loss:0.034219414963064965


 49%|████▉     | 489/1000 [03:40<03:14,  2.63it/s]

train loss:0.08809007470384535


 49%|████▉     | 490/1000 [03:40<03:19,  2.56it/s]

train loss:0.12690812477493743


 49%|████▉     | 491/1000 [03:40<03:24,  2.49it/s]

train loss:0.0536995969040365


 49%|████▉     | 492/1000 [03:41<03:20,  2.53it/s]

train loss:0.0628445360857976


 49%|████▉     | 493/1000 [03:41<03:16,  2.58it/s]

train loss:0.035912991128837


 49%|████▉     | 494/1000 [03:42<03:20,  2.52it/s]

train loss:0.03085293880503356


 50%|████▉     | 495/1000 [03:42<03:19,  2.53it/s]

train loss:0.0642359635755766


 50%|████▉     | 496/1000 [03:42<03:16,  2.56it/s]

train loss:0.031988257546004734


 50%|████▉     | 497/1000 [03:43<03:15,  2.58it/s]

train loss:0.05164664108293019


 50%|████▉     | 498/1000 [03:43<03:17,  2.54it/s]

train loss:0.038426630652492515


 50%|████▉     | 499/1000 [03:44<03:19,  2.51it/s]

train loss:0.056128894319250326


 50%|█████     | 500/1000 [03:44<03:14,  2.57it/s]

train loss:0.04649792529270319


 50%|█████     | 500/1000 [03:44<03:44,  2.22it/s]

train loss:0.08729490893884399





KeyboardInterrupt: 

In [None]:
# パラメータの保存
network.save_params("params.pkl")
print("Saved Network Parameters!")

# グラフの描画
markers = {'train': 'o', 'test': 's'}
x = np.arange(max_epochs)
plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2)
plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()
