In [17]:
import numpy as np

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def mse_derivative(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size


class AddNN:
    def __init__(self, input_size, hidden_size, output_size, lr):
        #initialized parameters
        self.W1 = np.random.randn(input_size, hidden_size) * 0.1
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.1
        self.b2 = np.zeros((1, output_size))
        self.lr = lr

    def forward(self, X):
        # forward pass
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        return self.z2

    def backward(self, X, y, y_pred):
        #gradients and back propagation
        dL_dy = mse_derivative(y, y_pred) 

        dL_dW2 = np.dot(self.a1.T, dL_dy)  
        dL_db2 = np.sum(dL_dy, axis=0, keepdims=True)

        dL_da1 = np.dot(dL_dy, self.W2.T)  
        dL_dz1 = dL_da1 * relu_derivative(self.z1)

        dL_dW1 = np.dot(X.T, dL_dz1) 
        dL_db1 = np.sum(dL_dz1, axis=0, keepdims=True)

        
        # update parameters
        self.W1 -= self.lr * dL_dW1
        self.b1 -= self.lr * dL_db1
        self.W2 -= self.lr * dL_dW2
        self.b2 -= self.lr * dL_db2

    def train(self, X, y, epochs):
        for epoch in range(epochs):
            y_pred = self.forward(X)
            loss = mse(y, y_pred)
            self.backward(X, y, y_pred)
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.6f}")

    def predict(self, X):
        return self.forward(X)


X = []
y = []
for a in range(0, 100):
    for b in range(0, 100):
        X.append([a, b])
        y.append([a + b])

X = np.array(X, dtype=float)
y = np.array(y, dtype=float)

X /= 100.0 #Normalization
y /= 200.0 

nn = AddNN(input_size=2, hidden_size=8, output_size=1, lr=0.1)
nn.train(X, y, epochs=2000)

test = np.array([[20, 20], [33, 42], [71, 78]], dtype=float) / 100.0
pred = nn.predict(test) * 200.0 #denormalize
print("\nPredictions:")
for inp, p in zip(test*100, pred):
    print(f"{int(inp[0])} + {int(inp[1])} ≈ {p[0]:.2f}")


Epoch 0, Loss: 0.311230
Epoch 100, Loss: 0.034595
Epoch 200, Loss: 0.013105
Epoch 300, Loss: 0.001288
Epoch 400, Loss: 0.000100
Epoch 500, Loss: 0.000037
Epoch 600, Loss: 0.000029
Epoch 700, Loss: 0.000024
Epoch 800, Loss: 0.000020
Epoch 900, Loss: 0.000017
Epoch 1000, Loss: 0.000015
Epoch 1100, Loss: 0.000013
Epoch 1200, Loss: 0.000011
Epoch 1300, Loss: 0.000010
Epoch 1400, Loss: 0.000009
Epoch 1500, Loss: 0.000008
Epoch 1600, Loss: 0.000007
Epoch 1700, Loss: 0.000006
Epoch 1800, Loss: 0.000006
Epoch 1900, Loss: 0.000005

Predictions:
20 + 20 ≈ 39.92
33 + 42 ≈ 74.96
71 + 78 ≈ 149.05
