In [None]:
from keras.datasets import cifar10

(Xtr, Ytr), (Xte, Yte) = cifar10.load_data() # a magic function we provide
print(Xtr)
# flatten out all images to be one-dimensional
Xtr_rows = Xtr.reshape(Xtr.shape[0], 32 * 32 * 3) # Xtr_rows becomes 50000 x 3072
Xte_rows = Xte.reshape(Xte.shape[0], 32 * 32 * 3) # Xte_rows becomes 10000 x 3072
print(Xtr_rows)

In [71]:
import math
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import randn

In [72]:
def softmax(x):
    e = np.exp(x-np.max(x, axis=1, keepdims=True))
    return e / np.sum(e, axis=1, keepdims=True)
# cross-entropy loss to give a single number
def loss_error(y,t):
    return np.mean(np.sum(-t * np.log(y), axis =1))

In [73]:
class LinearClassifier:
  def __init__(self, input_size, output_size, initializer='random'):
      self.W = 0.1 * np.random.randn(input_size, output_size)
      self.b = 0.1 * np.zeros(output_size)
      self.x = None
      self.y = None
      self.dW = None # W gradient
      self.db = None # b gradient

  def forward(self, x):
      self.x = x # x값을 기억해 둠 (gradient 를 구할 때 사용)
      self.y = np.dot(self.x, self.W) + self.b
      return self.y

  def backward(self, d_out, learning_rate): #parameter update
      self.dW = np.dot(self.x.T, d_out)
      self.db = np.sum(d_out, axis=0)
      d_x = np.dot(d_out, self.W.T)
      self.W -= learning_rate * self.dW
      self.b -= learning_rate * self.db
      return d_x

In [74]:
# last layer
class SoftmaxWithLoss:
  def __init__(self):
      self.error = None
      self.y = None # 예측 답안
      self.t = None # 모범 답안

  def forward(self,x):
      self.y = softmax(x)
      return self.y

  def loss(self, t):
      self.t = t
      self.error = loss_error(self.y, self.t)
      return self.error

  def backward(self, d_out=1, learning_rate=None): #gradient
      batch_size = self.t.shape[0]
      d_x = (self.y - self.t) / batch_size
      return d_x

In [75]:
# one-hot encoder
def onehot(y, n_class):
    vectors = np.zeros((len(y), n_class))
    for i, label in enumerate(y):
        vectors[i, label] = 1
    return vectors

In [76]:
Xtr_rows = (Xtr_rows - 127.5) / 255
Xte_rows = (Xte_rows - 127.5) / 255

In [77]:
# check normalized data
print('Training set')
print('global mean = %.5f' % np.mean(Xtr_rows))
print('global std  = %.5f' % np.std(Xtr_rows))
print('global max = %.5f' % np.max(Xtr_rows))
print('global min = %.5f' % np.min(Xtr_rows))

print()
print('Test set')
print('global mean = %.5f' % np.mean(Xte_rows))
print('global std  = %.5f' % np.std(Xte_rows))
print('global max = %.5f' % np.max(Xte_rows))
print('global min = %.5f' % np.min(Xte_rows))

Training set
global mean = -0.02664
global std  = 0.25157
global max = 0.50000
global min = -0.50000

Test set
global mean = -0.02342
global std  = 0.25122
global max = 0.50000
global min = -0.50000


In [78]:
n_dim = Xtr_rows.shape[1]
n_class = 10

input_layer = LinearClassifier(n_dim,n_class)

print('shape of W =', input_layer.W.shape)
print('shape of b =', input_layer.b.shape)


shape of W = (3072, 10)
shape of b = (10,)


forward

In [79]:
# sample mini-batch
batch = Xtr_rows[:, :] # the first 4 images from training set
print('shape of mini batch =', batch.shape)

shape of mini batch = (50000, 3072)


In [80]:
# output
output = input_layer.forward(batch)
print('shape of output =', output.shape) # each row assigns 10 numbers 
print(output)

shape of output = (50000, 10)
[[ 0.08261961  0.58470427 -1.19271237 ...  0.18692303 -0.42907895
  -0.87231547]
 [-1.69690341 -0.16766308 -1.5709959  ...  2.70656894 -0.13687381
  -2.31830144]
 [ 0.6070247   2.28507757  3.11676591 ... -0.29967361 -1.69714993
  -1.3510628 ]
 ...
 [-2.14081227  2.59660927 -1.09144735 ...  0.88665048  0.14309149
  -1.04116018]
 [ 0.76241264  0.71065485  3.4641438  ...  1.22326812 -1.4425265
  -0.52464761]
 [ 1.42783289 -1.70467183  0.53753457 ... -0.66456794 -3.01965724
  -1.13970823]]


In [81]:
answer = Ytr[:, :]
print('shape of mini batch =', answer.shape)
print(answer)

shape of mini batch = (50000, 1)
[[6]
 [9]
 [9]
 ...
 [9]
 [1]
 [1]]


In [82]:
answer = onehot(Ytr[:], n_class)
print(answer)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]


In [83]:
loss = SoftmaxWithLoss()
loss.forward(output)
first_loss = loss.loss(answer)
print(first_loss)

3.3016645006183354


In [84]:
np.argmax(output, axis=1)

array([1, 7, 6, ..., 5, 2, 6])

**backward**

In [85]:
learning_rate = 0.1
q = loss.backward()
r = input_layer.backward(q,learning_rate)

In [86]:
# output
output2 = input_layer.forward(batch)
print('shape of output =', output2.shape) # each row assigns 10 numbers 

shape of output = (50000, 10)


In [87]:
loss = SoftmaxWithLoss()
loss.forward(output2)
second_loss = loss.loss(answer)
print(first_loss)
print(second_loss)

3.3016645006183354
2.91116293380188


In [88]:
# model prediction
np.argmax(output2, axis=1)

array([1, 7, 6, ..., 1, 2, 6])

반복

In [89]:
for i in range(1000):
  q = loss.backward()
  r = input_layer.backward(q,learning_rate)
  output = input_layer.forward(batch)
  loss.forward(output)
  while_loss = loss.loss(answer)
  print(while_loss)

2.725153930186623
2.6134566825351446
2.5368217039087066
2.4803757969841715
2.436399325238959
2.4004912686081536
2.3700831504480626
2.3436429525294447
2.320220151637067
2.299192851846147
2.2801306301790265
2.262719982528339
2.246722721720253
2.2319517626713097
2.218256309454733
2.2055123284801237
2.193616134968829
2.182479902880059
2.1720284176979887
2.162196664605361
2.152927997158749
2.1441727206116736
2.1358869782382053
2.128031863337652
2.1205727021066583
2.1134784677603577
2.1067212967971254
2.100276085722023
2.094120151869105
2.088232945837494
2.082595805909406
2.0771917469420393
2.0720052778215226
2.0670222427782443
2.0622296827902664
2.0576157140179423
2.0531694207713738
2.048880760951581
2.04474048225482
2.040740047708465
2.036871569332126
2.0331277489010002
2.02950182493896
2.0259875251932074
2.0225790239459647
2.019270903605482
2.016058120091881
2.0129359715954975
2.009900070338376
2.0069463170149406
2.0040708776269693
2.001270162461721
1.9985408069913535
1.9958796544971964
1

In [90]:
# model prediction
np.argmax(output, axis=1)

array([3, 1, 9, ..., 9, 8, 9])

In [91]:
print(Ytr[:, :])

[[6]
 [9]
 [9]
 ...
 [9]
 [1]
 [1]]


test

In [92]:
batch = Xte_rows
print('shape of mini batch =', batch.shape)

shape of mini batch = (10000, 3072)


In [93]:
output = input_layer.forward(batch)

In [94]:
test_answer = Yte[:,:]
print(test_answer)
test_answer = onehot(test_answer,10)

[[3]
 [8]
 [8]
 ...
 [5]
 [1]
 [7]]


In [95]:
loss_test = SoftmaxWithLoss()
loss_test.forward(output)
first_loss = loss_test.loss(test_answer)
print(first_loss)

1.7434160514105133


In [96]:
np.argmax(output, axis=1)

array([3, 9, 8, ..., 3, 5, 7])