<a href="https://colab.research.google.com/github/yesoly/2019_cau_oss_hackathon/blob/master/Assignment9%EC%9D%98_%EC%82%AC%EB%B3%B8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CODE

1. Input Data

In [0]:
import matplotlib.pyplot as plt
import numpy as np
import torch

In [0]:
file_data   = "/mnist.csv"
handle_file = open(file_data, "r")
data        = handle_file.readlines()
handle_file.close()

size_row    = 28    # height of the image
size_col    = 28    # width of the image

num_image   = len(data)
count       = 0     # count for the number of images

#
# normalize the values of the input data to be [0, 1]
#
def normalize(data):

    data_normalized = (data - min(data)) / (max(data) - min(data))

    return(data_normalized)

#
# example of distance function between two vectors x and y
#
def distance(x, y):

    d = (x - y) ** 2
    s = np.sum(d)
    # r = np.sqrt(s)

    return(s)

#
# make a matrix each column of which represents an images in a vector form
#
list_image  = np.empty((size_row * size_col, num_image), dtype=float)
list_label  = np.empty(num_image, dtype=int)

for line in data:

    line_data   = line.split(',')
    label       = line_data[0]
    im_vector   = np.asfarray(line_data[1:])
    im_vector   = normalize(im_vector)

    list_label[count]       = label
    list_image[:, count]    = im_vector

    count += 1

#
# plot first 150 images out of 10,000 with their labels
#
f1 = plt.figure(1)

for i in range(150):

    label       = list_label[i]
    im_vector   = list_image[:, i]
    im_matrix   = im_vector.reshape((size_row, size_col))

    plt.subplot(10, 15, i+1)
    plt.title(label)
    plt.imshow(im_matrix, cmap='Greys', interpolation='None')

    frame   = plt.gca()
    frame.axes.get_xaxis().set_visible(False)
    frame.axes.get_yaxis().set_visible(False)


#plt.show()

#
# plot the average image of all the images for each digit
#
f2 = plt.figure(2)

im_average  = np.zeros((size_row * size_col, 10), dtype=float)
im_count    = np.zeros(10, dtype=int)

for i in range(num_image):

    im_average[:, list_label[i]] += list_image[:, i]
    im_count[list_label[i]] += 1

for i in range(10):

    im_average[:, i] /= im_count[i]

    plt.subplot(2, 5, i+1)
    plt.title(i)
    plt.imshow(im_average[:,i].reshape((size_row, size_col)), cmap='Greys', interpolation='None')

    frame   = plt.gca()
    frame.axes.get_xaxis().set_visible(False)
    frame.axes.get_yaxis().set_visible(False)

plt.show()

2. Sigmoid Function as an activation function

In [0]:
def sigmoid_func(z):
  return 1.0 / (1.0 + np.exp(-z))

def sigmoid_grad(z):
  sigmoid =  1.0 / (1.0 + np.exp(-z))
  return sigmoid * (1 - sigmoid)

3. Objective Function

In [0]:
def cost(y_enc, predict):
  return np.sum(-label * (np.log(predict)) - (1 - label)* np.log(1 - predict)) / len(predict)

def random_theta():
  theta_u = np.random.randn(785, 196) # + bias
  theta_v = np.random.randn(197, 49)
  theta_w = np.random.randn(50, 10)
  return theta_u, theta_v, theta_w

4. Gradient Descent (Back-propagation)

In [0]:
def func_calculate(image, u, v, w):
  x_temp = np.ones((image.shape[0]+1, image.shape[1])) # add bia
  x_temp[1:, :] = image
  x = x_temp
  y_ = u.dot(x)
  y = sigmoid_func(y_)

  y_temp = np.ones((y.shape[0] + 1, y.shape[1]))
  y_temp[1:, :] = y
  y = y_temp
  z_ = v.dot(y)
  z = sigmoid_func(z_)

  z_temp = np.ones((z.shape[0] + 1, z.shape[1]))
  z_temp[1:, :] = z
  z = z_temp
  h_ = w.dot(z)
  h = sigmoid_func(h_)

  return x, y, y_, z, z_, h

def gradient(x, y, z, h, y_, z_, y_enc, u, v, w):
  delta_h = h - y_enc

  z_temp = np.ones((z_.shape[0]+1, z_.shape[1]))
  z_temp[1:, :] = z_
  z_ = z_temp
  delta_z = w.T.dot(delta_h) * sigmoid_grad(z)
  delta_z = delta_z[1:, :]

  y_temp = np.ones((y_.shape[0]+1, y_.shape[1]))
  y_temp[1:, :] = y_
  y = y_temp
  delta_y = v.T.dot(delta_z) * sigmoid_grad(y)
  delta_y = delta_y[1:, :]

  grad_u = delta_y.dot(x)
  grad_v = delta_z.dot(y.T)
  grad_w = delta_h.dot(z.T)

  return grad_u, grad_v, grad_w

In [0]:
def accuracy_train(image, label, predict):
  match = np.sum(label == predict, axis = 0)
  total = image.shape[0]
  return match/total

- Training

In [0]:
def training(lr, epoch, image, label):
  cost_converge_value = 1e-7  # 수렴 값
  theta = random_theta()
  y_enc = np.zeros((10, label.shape[0]))
  for i, val in enumerate(label):
    y_enc[val, i] = 1.0
  u, v, w = random_theta()

  # Save data
  J_list = []
  accuracy_list = []
  theta_list = []

  while (True) :
    lr /= (1 + 0.00001 * epoch)
    x, y, y_, z, z_, h = func_calculate(image, u, v, w)
    predict = np.argmax(z_, axis = 0)
    J = cost(y_enc, h)
    grad_u, grad_v, grad_w = gradient(x, y, z, h, y_, z_, y_enc, u, v, w)

    u -= lr * grad_u
    v -= lr * grad_v
    w -= lr * grad_w

    accuracy = accuracy_train(image, label, predict)
    epoch += 1
    J_list.append(J)
    accuracy_list.append(accuracy)
    theta_list[0].append(u)
    theta_list[1].append(v)
    theta_list[2].append(w)

    # Print Log
    if epoch % 1000 == 0:
     print('Epoch: {:6d}, cost: {:10f}, accuracy: {:10f}'.format(epoch, J, accuracy))

  return J_list, accuracy_list, theta_list

- testing

In [0]:
def testing(image, label, u, v, w):
  for i in label:
    x, x_, y, y_, z, z_, h = func_calculate(image, u, v, w)
    predict = np.argmax(z_, axis = 0)
    accuracy = accuracy_train(image, label, predict)

In [0]:
learning_rate = 0.001
epoch = 0

print("=========================== Training Start ===========================")
train_cost, train_accuracy, train_theta = training(learning_rate, epoch, list_image[:, :5999], list_label[:5999])
final_u = train_theta[0, -1]
final_v = train_theta[1, -1]
final_w = train_theta[2, -1]
print("=========================== Testing Start ===========================")
testing(image, label, final_u, final_v, final_w)

#Submission

1. Plot the loss curve

In [0]:
plt.title("Loss Curve")
plt.ylabel("")
plt.plot(len(J_list), J_list, c = 'b')
plt.plot(len(J_list), J_list, c = 'r')

2. Plot the accuracy curve

In [0]:
final_W = result_theta
avg = average_digit(final_W)
for i in range(0,10,1):
  print('digit ',i, ': %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f' %(avg[i,0],avg[i,1],avg[i,2],avg[i,3],avg[i,4],avg[i,5],avg[i,6],avg[i,7],avg[i,8],avg[i,9]))

3. Plot the accuracy value

4. Plot the classification example