In [2]:
import tensorflow as tf
import numpy as np
from keras.layers import Dense, Flatten
from keras.models import Sequential

inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
outputs = np.array([[0],[1],[1],[0]])

tf.random.set_seed(42)

xor_model = Sequential()
xor_model.add(Flatten(input_shape=(2,)))             #input layer
xor_model.add(Dense(4, activation="relu"))           #hidden layer
xor_model.add(Dense(1, activation="sigmoid"))        #Output layer


xor_model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy'],
)

xor_model.fit(inputs, outputs, epochs=50, batch_size=2, verbose=1)

print(np.round(xor_model.predict(np.array([[0,1]]))))

Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.5000 - loss: 0.7478 
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.2500 - loss: 0.7363    
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.2500 - loss: 0.7264    
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.2500 - loss: 0.7170    
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.2500 - loss: 0.7081    
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.2500 - loss: 0.7008    
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.2500 - loss: 0.6945    
Epoch 8/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.2500 - loss: 0.6882    
Epoch 9/50
[1m2/2[0m [32m━━━━━━━

In [2]:
import numpy as np

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)
        self.b2 = np.zeros((1, output_size))

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def sigmoid_derivative(self, z):
        return self.sigmoid(z) * (1 - self.sigmoid(z))
    
    def forward(self, X):
        self.z1 = np.dot(X,self.W1)+self.b1
        self.a1 = np.maximum(0, self.z1)

        self.z2 = np.dot(self.a1, self.W2)
        self.a2 = np.maximum(0, self.z2)

        return self.a2
    
    def backward(self, X, y, lr=0.01):
        m = X.shape[0]

        dz2 = self.a2 - y.reshape(-1,1)
        dW2 = np.dot(self.a1.T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m

        # Hidden layer gradient (ReLU derivative)
        dz1 = np.dot(dz2, self.W2.T) * (self.z1 > 0)
        dW1 = np.dot(X.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

        # Update weights
        self.W1 -= lr * dW1
        self.b1 -= lr * db1
        self.W2 -= lr * dW2
        self.b2 -= lr * db2

    def train(self, X, y, epochs=1000, lr=0.1):
        for epoch in range(epochs):
            # Forward and backward pass
            self.forward(X)
            self.backward(X, y, lr)

            # Print loss every 100 epochs
            if epoch % 100 == 0:
                loss = -np.mean(y * np.log(self.a2 + 1e-8) + (1 - y) * np.log(1 - self.a2 + 1e-8))
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X, threshold=0.5):
        proba = self.forward(X)
        return (proba >= threshold).astype(int)
    

    # 1. Data Preparation (XOR problem)
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0])

# 2. Initialize and train the network
nn = NeuralNetwork(input_size=2, hidden_size=4, output_size=1)
nn.train(X, y, epochs=1000, lr=0.1)

# 3. Make predictions
predictions = nn.predict(X)
print("\nFinal Predictions:")
for i in range(len(X)):
    print(f"Input: {X[i]}, Predicted: {predictions[i][0]}, Actual: {y[i]}")

# 4. Print learned weights
print("\nLearned Weights:")
print("W1:", nn.W1)
print("b1:", nn.b1)
print("W2:", nn.W2)
print("b2:", nn.b2)


Epoch 0, Loss: 7.6353
Epoch 100, Loss: 0.7119
Epoch 200, Loss: 0.8889
Epoch 300, Loss: 1.3036
Epoch 400, Loss: 1.7865
Epoch 500, Loss: 2.2702
Epoch 600, Loss: 2.7528
Epoch 700, Loss: 3.3888
Epoch 800, Loss: 3.8277
Epoch 900, Loss: 3.9907

Final Predictions:
Input: [0 0], Predicted: 0, Actual: 0
Input: [0 1], Predicted: 1, Actual: 1
Input: [1 0], Predicted: 1, Actual: 1
Input: [1 1], Predicted: 0, Actual: 0

Learned Weights:
W1: [[-0.81254793  1.45658831  1.94846248  0.69971631]
 [ 0.81288145  0.91976207  0.08364781  0.11697164]]
b1: [[-1.88941989e-05 -9.19767885e-01 -4.63942440e-05  1.17895710e-02]]
W2: [[ 1.10668692]
 [-1.19250664]
 [ 0.73067121]
 [ 0.30411727]]
b2: [[0.4556264]]
