<a href="https://colab.research.google.com/github/ysj9909/DL_practice_from_scratch/blob/main/Neural_Net_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import copy

In [1]:
class Two_layer_Neural_Net:
  def __init__(self, input_dim, hidden_dim, output_dim, std = 0.01):
    self.params = {}
    # Kaiming / MSRA initialization
    self.params["W1"] = np.random.randn(input_dim, hidden_dim) * std
    self.params["b1"] = np.zeros((hidden_dim))
    self.params["W2"] = np.random.randn(hidden_dim, output_dim) * std
    self.params["b2"] = np.zeros((output_dim))

    self.grads = {}
    for key in self.params.keys():
      self.grads[key] = np.zeros_like(self.params[key])

  def train(self, inputs, targets, num_epochs, lr, mode = None):
    '''
    inputs : (batch_size, input_dim)
    targets : regression problem - (batch_size, output_dim)   / classification problem - ( batch_size, )
    '''
    batch_size = inputs.shape[0]
    for epoch in range(num_epochs):
      W1, b1 = self.params["W1"], self.params["b1"]
      W2, b2 = self.params["W2"], self.params["b2"]

      z_hidden = np.dot(inputs, W1) + b1   # (batch_size, hidden_dim)
      a_hidden = np.maximum(0, z_hidden)
      outputs = np.dot(a_hidden, W2) + b2   # (batch_size, output_dim)

      if mode == "MSELoss":
        loss = np.mean((outputs - targets) ** 2 ) 
        doutputs = (outputs - targets) * (2 / batch_size)   # dL / doutputs , shape of (batch_size, output_dim)

      if mode == "CrossEntropyLoss":
        exp_outputs = np.exp(outputs)
        softmax = exp_outputs / np.sum(exp_outputs, axis = -1)
        loss = 0
        doutputs = copy.deepcopy(softmax)
        for i, idx in enumerate(targets):
          loss -= np.log(softmax[i, idx])
          doutputs[i, idx] -= 1
        loss /= batch_size
      if mode is None:
        return outputs
        break
      
      self.grads["b2"] = np.sum(doutputs , axis = 0)   # dL / db2
      self.grads["W2"] = np.dot(a_hidden.T, doutputs)   # dL / dW2
      da_hidden = np.dot(doutputs, W2.T)   # dL / da_hidden
      dz_hidden = da_hidden * (z_hidden > 0)   # dL / dz_hidden
      self.grads["b1"] = np.sum(dz_hidden, axis = 0)   # dL / db1
      self.grads["W1"] = np.dot(inputs.T, dz_hidden)   # dL / dW1

      # SGD
      for key in self.params.keys():
        self.params[key] -= lr * self.grads[key]

      print(f" Epoch [{epoch + 1} / {num_epochs}], Loss : {loss}")


In [None]:
# Case of Regression
input_dim = 10
hidden_dim = 100
output_dim = 10

model1 = Two_layer_Neural_Net(input_dim, hidden_dim, output_dim)

X = np.random.randn(128, 10)
y = np.random.randn(128, 10)

model1.train(X, y, num_epochs = 400, lr = 0.1, mode = "MSELoss")

 Epoch [1 / 400], Loss : 1.0110666562531114
 Epoch [2 / 400], Loss : 1.0073893164084526
 Epoch [3 / 400], Loss : 1.0049663751042264
 Epoch [4 / 400], Loss : 1.003331450321689
 Epoch [5 / 400], Loss : 1.0021884250115582
 Epoch [6 / 400], Loss : 1.0013490085266912
 Epoch [7 / 400], Loss : 1.0006995073582732
 Epoch [8 / 400], Loss : 1.0001623614740043
 Epoch [9 / 400], Loss : 0.999688031670382
 Epoch [10 / 400], Loss : 0.9992425192572545
 Epoch [11 / 400], Loss : 0.9987991046143595
 Epoch [12 / 400], Loss : 0.998343114048547
 Epoch [13 / 400], Loss : 0.997861473480129
 Epoch [14 / 400], Loss : 0.9973479858486499
 Epoch [15 / 400], Loss : 0.9967948379824954
 Epoch [16 / 400], Loss : 0.9961924411597443
 Epoch [17 / 400], Loss : 0.9955340635076908
 Epoch [18 / 400], Loss : 0.99480906005323
 Epoch [19 / 400], Loss : 0.9940121606493234
 Epoch [20 / 400], Loss : 0.9931364779959037
 Epoch [21 / 400], Loss : 0.9921739534465622
 Epoch [22 / 400], Loss : 0.9911146674792608
 Epoch [23 / 400], Loss :

In [6]:
# Case of Classification

input_dim = 10
hidden_dim = 100
output_dim = 10

model2 = Two_layer_Neural_Net(input_dim, hidden_dim, output_dim)

X = 10 * np.random.randn(10, 10)
y = np.random.choice(10, size = 10, replace = True)

model2.train(X, y, num_epochs = 80, lr = 0.001, mode = "CrossEntropyLoss")

 Epoch [1 / 80], Loss : 2.3070121268758887
 Epoch [2 / 80], Loss : 2.2986217251250944
 Epoch [3 / 80], Loss : 2.2903749699586906
 Epoch [4 / 80], Loss : 2.282160421993434
 Epoch [5 / 80], Loss : 2.2738990558193075
 Epoch [6 / 80], Loss : 2.265620707547874
 Epoch [7 / 80], Loss : 2.2573117138172516
 Epoch [8 / 80], Loss : 2.2488108140302834
 Epoch [9 / 80], Loss : 2.2401174017559216
 Epoch [10 / 80], Loss : 2.231233646228333
 Epoch [11 / 80], Loss : 2.222057477608038
 Epoch [12 / 80], Loss : 2.2126191712543815
 Epoch [13 / 80], Loss : 2.202874688236453
 Epoch [14 / 80], Loss : 2.1926464755872628
 Epoch [15 / 80], Loss : 2.182125328589211
 Epoch [16 / 80], Loss : 2.17109933646712
 Epoch [17 / 80], Loss : 2.1595615230493745
 Epoch [18 / 80], Loss : 2.14747239276641
 Epoch [19 / 80], Loss : 2.1347869803456803
 Epoch [20 / 80], Loss : 2.1214636963469777
 Epoch [21 / 80], Loss : 2.1074513215184085
 Epoch [22 / 80], Loss : 2.092652940447224
 Epoch [23 / 80], Loss : 2.077053888749664
 Epoch [2

In [7]:
output = model2.train(X, y, num_epochs= 1, lr = 1)
predicted = np.argmax(output, axis = -1)

In [8]:
print(predicted == y)

[ True  True  True  True  True  True  True  True  True  True]
