<a href="https://colab.research.google.com/github/zjkang/machine-learning-coding/blob/main/MLP_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Refer to [Feedforward NN](https://github.com/zjkang/Machine-Learning-Interviews/blob/main/src/MLC/notebooks/feedforward.ipynb)

实现了2种方式
* 2层神经网络,relu activation, gradient descent, 基于实现,尝试了几点改进
    - Weight initialization LiMu的动手学习深度学习讲了Xavier
    - Learning rate decay
    - Regularization
    - Mini-batch training
    - Optimization algorithm: Adam, Adagrad, and RMSprop

* 扩展版
    - 多层网络
    - 激活函数
    - loss function

In [None]:
import numpy as np

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        self.params = {}
        self.params['W1'] = np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

    def forward(self, X):
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        z1 = np.dot(X, W1) + b1
        a1 = np.maximum(0, z1) # ReLU activation function
        z2 = np.dot(a1, W2) + b2
        # probs = 1 / (1 + np.exp(-z2)) # Sigmoid activation function
        exp_z = np.exp(z2)
        probs = exp_z / np.sum(exp_z, axis=1, keepdims=True)
        return probs

    def loss(self, X, y):
        probs = self.forward(X)
        correct_logprobs = -np.log(probs[range(len(X)), y])
        data_loss = np.sum(correct_logprobs)
        return 1.0/len(X) * data_loss

    def train(self, X, y, num_epochs, learning_rate=0.1):
        for epoch in range(num_epochs):
            # Forward propagation
            z1 = np.dot(X, self.params['W1']) + self.params['b1']
            a1 = np.maximum(0, z1) # ReLU activation function
            z2 = np.dot(a1, self.params['W2']) + self.params['b2']
            # probs = 1 / (1 + np.exp(-z2)) # Softmax activation function
            exp_z = np.exp(z2)
            probs = exp_z / np.sum(exp_z, axis=1, keepdims=True)

            # Backpropagation
            delta3 = probs
            delta3[range(len(X)), y] -= 1 # Softmax derivative https://zhuanlan.zhihu.com/p/374921739
            dW2 = np.dot(a1.T, delta3)
            db2 = np.sum(delta3, axis=0)
            delta2 = np.dot(delta3, self.params['W2'].T) * (a1 > 0) # derivative of ReLU
            dW1 = np.dot(X.T, delta2)
            db1 = np.sum(delta2, axis=0)

            # Update parameters
            self.params['W1'] -= learning_rate * dW1
            self.params['b1'] -= learning_rate * db1
            self.params['W2'] -= learning_rate * dW2
            self.params['b2'] -= learning_rate * db2

            # Print loss for monitoring training progress
            if epoch % 100 == 0:
                loss = self.loss(X, y)
                print("Epoch {}: loss = {}".format(epoch, loss))

In [None]:
# Generate a toy dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0])

# Initialize a neural network
net = TwoLayerNet(input_size=2, hidden_size=10, output_size=2)

# Train the neural network
net.train(X, y, num_epochs=1000)

# Test the neural network
probs = net.forward(X)
predictions = np.argmax(probs, axis=1)
print("Predictions: ", predictions)

Epoch 0: loss = 0.8303842914291193
Epoch 100: loss = 0.04766938276333535
Epoch 200: loss = 0.019828067291249734
Epoch 300: loss = 0.011858685673936456
Epoch 400: loss = 0.008334605467611186
Epoch 500: loss = 0.0063652485018918245
Epoch 600: loss = 0.00511881633097276
Epoch 700: loss = 0.004270173857606601
Epoch 800: loss = 0.0036564149465816496
Epoch 900: loss = 0.0031874020635375548
Predictions:  [0 1 1 0]
