<a href="https://colab.research.google.com/github/yesoly/2019_cau_oss_hackathon/blob/master/Assignment_10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CODE

1. Input Data

In [0]:
import matplotlib.pyplot as plt
import numpy as np

In [0]:
file_data   = "/mnist.csv"
handle_file = open(file_data, "r")
data        = handle_file.readlines()
handle_file.close()

size_row    = 28    # height of the image
size_col    = 28    # width of the image

num_image   = len(data)
count       = 0     # count for the number of images

#
# normalize the values of the input data to be [0, 1]
#
def normalize(data):

    data_normalized = (data - min(data)) / (max(data) - min(data))

    return(data_normalized)

#
# example of distance function between two vectors x and y
#
def distance(x, y):

    d = (x - y) ** 2
    s = np.sum(d)
    # r = np.sqrt(s)

    return(s)

#
# make a matrix each column of which represents an images in a vector form
#
list_image  = np.empty((size_row * size_col, num_image), dtype=float)
list_label  = np.empty(num_image, dtype=int)

for line in data:

    line_data   = line.split(',')
    label       = line_data[0]
    im_vector   = np.asfarray(line_data[1:])
    im_vector   = normalize(im_vector)

    list_label[count]       = label
    list_image[:, count]    = im_vector

    count += 1

#
# plot first 150 images out of 10,000 with their labels
#
f1 = plt.figure(1)

for i in range(150):

    label       = list_label[i]
    im_vector   = list_image[:, i]
    im_matrix   = im_vector.reshape((size_row, size_col))

    plt.subplot(10, 15, i+1)
    plt.title(label)
    plt.imshow(im_matrix, cmap='Greys', interpolation='None')

    frame   = plt.gca()
    frame.axes.get_xaxis().set_visible(False)
    frame.axes.get_yaxis().set_visible(False)


#plt.show()

#
# plot the average image of all the images for each digit
#
f2 = plt.figure(2)

im_average  = np.zeros((size_row * size_col, 10), dtype=float)
im_count    = np.zeros(10, dtype=int)

for i in range(num_image):

    im_average[:, list_label[i]] += list_image[:, i]
    im_count[list_label[i]] += 1

for i in range(10):

    im_average[:, i] /= im_count[i]

    plt.subplot(2, 5, i+1)
    plt.title(i)
    plt.imshow(im_average[:,i].reshape((size_row, size_col)), cmap='Greys', interpolation='None')

    frame   = plt.gca()
    frame.axes.get_xaxis().set_visible(False)
    frame.axes.get_yaxis().set_visible(False)

plt.show()

2. Sigmoid Function as an activation function

In [0]:
def sigmoid_func(z):
  return 1.0 / (1.0 + np.exp(-z))

def sigmoid_grad(z):
  sigmoid =  1.0 / (1.0 + np.exp(-z))
  return sigmoid * (1 - sigmoid)

3. Objective Function

hidden layer 1개

In [0]:
def L2_norm (lamb, u, v):
  len_theta = len(u) + len(v)
  return (lamb/len_theta) * (np.sum(u[:, 1:] ** 2) + np.sum(v[:, 1:] ** 2))

def cost(y_enc, predict, L2):
  return np.sum(-y_enc * (np.log(predict)) - (1 - y_enc)* np.log(1 - predict)) / len(predict) + L2

def random_theta():
  theta_u = np.random.randn(196, 785) # + bias
  theta_v = np.random.randn(10, 197)
  return theta_u, theta_v

4. Gradient Descent (Back-propagation)

In [0]:
def func_calculate(image, u, v):
  x_temp = np.ones((image.shape[0]+1, image.shape[1])) # add bia
  x_temp[1:, :] = image
  x = x_temp
  y_ = u.dot(x)
  y = sigmoid_func(y_)

  y_temp = np.ones((y.shape[0] + 1, y.shape[1]))
  y_temp[1:, :] = y
  y = y_temp
  z_ = v.dot(y)
  z = sigmoid_func(z_)

  return x, y, y_, z, z_

def gradient(x, y, z, y_, z_, y_enc, u, v):
  delta_z = z - y_enc

  y_temp = np.ones((y_.shape[0]+1, y_.shape[1]))
  y_temp[1:, :] = y_
  y_ = y_temp
  delta_y = v.T.dot(delta_z) * sigmoid_grad(y_)
  delta_y = delta_y[1:, :]


  grad_u = delta_y.dot(x.T)
  grad_v = delta_z.dot(y.T)

  return grad_u, grad_v

5. Accuracy Function

In [0]:
def accuracy_train(image, label, predict):
  match = np.sum(label == predict, axis = 0)
  return match/len(label)

In [0]:
def find_fail_img(image, label, predict):
  #list_image[:, 6000:]
  fail_img = []
  fail_digit =[]
  for idx in range(len(label)):
    if len(fail_img) == 10:
      break
    if label[idx] != predict[idx]:
      fail_img.append(image[:, [idx]])
      fail_digit.append(predict[[idx]])

  fig, ax = plt.subplots(nrows = 2, ncols = 5, sharex= True, sharey= True, )
  ax = ax.ravel()
  for i in range(10):
    img = fail_img[i].reshape((28, 28))
    ax[i].imshow(img, cmap='Greys', interpolation='None')
    ax[i].set_title("predict: %d" %fail_digit[i])

  ax[0].set_xticks([])
  ax[0].set_yticks([])
  plt.tight_layout()
  plt.show()

def find_success_img(image, label, predict):
  success_img = []
  success_digit =[]
  for idx in range(len(label)):
    if len(success_img) == 10:
      break
    if label[idx] == predict[idx]:
      success_img.append(image[:, [idx]])
      success_digit.append(predict[[idx]])

  fig, ax = plt.subplots(nrows = 2, ncols = 5, sharex= True, sharey= True, )
  ax = ax.ravel()
  for i in range(10):
    img = success_img[i].reshape((28, 28))
    ax[i].imshow(img, cmap='Greys', interpolation='None')
    ax[i].set_title("predict: %d" %success_digit[i])

  ax[0].set_xticks([])
  ax[0].set_yticks([])
  plt.tight_layout()
  plt.show()

- Training

In [0]:
def training(lr, epoch, lamb):
  cost_converge_value = 1e-7  # 수렴 값
  train_image = list_image[:, :999]
  train_label = list_label[:999]
  test_image = list_image[:, 1000:]
  test_label = list_label[1000:]

  train_y = np.zeros((10, train_label.shape[0]))
  for i, val in enumerate(train_label):
    train_y[val, i] = 1.0

  test_y = np.zeros((10, test_label.shape[0]))
  for i, val in enumerate(test_label):
    test_y[val, i] = 1.0

  u, v = random_theta()

  # Save data
  train_J_list = []
  train_accuracy_list = []

  test_J_list = []
  test_accuracy_list = []

  while (True) :
    x, y, y_, z, z_ = func_calculate(train_image, u, v)
    t_x, t_y, t_y_, t_z, t_z_ = func_calculate(test_image, u, v)
    
    train_image_predict = np.argmax(z_, axis = 0)
    test_image_predict = np.argmax(t_z_, axis = 0)
    l2 = L2_norm(lamb, u, v)
    train_J = cost(train_y, z, l2)
    test_J = cost(test_y, t_z, l2)

    grad_u, grad_v= gradient(x, y, z, y_, z_, train_y, u, v)

    u -= lr * grad_u
    v -= lr * grad_v
  
    train_accuracy = accuracy_train(train_image, train_label, train_image_predict)
    test_accuracy = accuracy_train(test_image, test_label, test_image_predict)

    epoch += 1

    train_J_list.append(train_J)
    test_J_list.append(test_J)
    train_accuracy_list.append(train_accuracy)
    test_accuracy_list.append(test_accuracy)

    # Print Log
    if epoch % 1000 == 0:
     print('Epoch: {:6d}, train_cost: {:10f}, train_accuracy: {:10f} / test_cost: {:10f}, test_accuracy: {:10f}'.format(epoch, train_J, train_accuracy, test_J, test_accuracy))

    if epoch == 20000 :
      print("cost is converged")
      break

  return train_J_list, train_accuracy_list, test_J_list, test_accuracy_list, test_image_predict

In [0]:
learning_rate = 0.001
epoch = 0
lamb = 0.05

print("=========================== Training Start ===========================")
train_J_list, train_accuracy_list, test_J_list, test_accuracy_list, test_image_predict = training(learning_rate, epoch, lamb)

#Submission

1. Plot the loss curve

In [0]:
plt.figure()
plt.title("Loss Curve")
plt.xlabel("iteration")
plt.ylabel("J")
plt.plot(range(len(train_J_list)), train_J_list, c="b", label = "training")
plt.plot(range(len(test_J_list)), test_J_list, c="r", label = "testing")
plt.legend(loc='upper right')
plt.show()




2. Plot the accuracy curve

In [0]:
plt.figure()
plt.title("Accuracy Curve")
plt.xlabel("iteration")
plt.ylabel("accuracy")
plt.plot(range(len(train_accuracy_list)), train_accuracy_list, c="b", label = "training")
plt.plot(range(len(test_accuracy_list)), test_accuracy_list, c="r", label = "testing")
plt.legend(loc='lower right')
plt.show()

3. Plot the accuracy value

In [0]:
print('Fianl Training Accuracy: %s%%' %(train_accuracy_list[-1] * 100))
print('Fianl Testing Accuracy: %s%%' %(test_accuracy_list[-1] * 100))

4. Plot the classification example

In [0]:
test_img = list_image[:, 1000:]
test_label = list_label[1000:]
print("=========================== Success Image ===========================")
find_success_img(test_img, test_label, test_image_predict)
print("=========================== Failed Image ===========================")
find_fail_img(test_img, test_label, test_image_predict)