In [1]:
import numpy as np

# Fonctions d'activation
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)





In [2]:
# Classe Layer représentant une couche d'un réseau de neurones
class Layer:
    def __init__(self, input_size, output_size, activation='relu', seed=42):
        np.random.seed(seed)
        self.W = np.random.randn(input_size, output_size)
        self.b = np.zeros((1, output_size))

        if activation == 'relu':
            self.activation = relu
            self.activation_derivative = relu_derivative
        elif activation == 'sigmoid':
            self.activation = sigmoid
            self.activation_derivative = sigmoid_derivative
        else:
            raise ValueError("Activation non supportée : utilisez 'relu' ou 'sigmoid'.")

        self.activation_name = activation
        self.input = None
        self.Z = None
        self.output = None
        self.dW = None
        self.db = None

    def forward(self, X):
        self.input = X
        self.Z = np.dot(X, self.W) + self.b
        self.output = self.activation(self.Z)
        return self.output

    def backward(self, dA, learning_rate):
        dZ = dA * self.activation_derivative(self.Z)
        self.dW = np.dot(self.input.T, dZ)
        self.db = np.sum(dZ, axis=0, keepdims=True)
        dA_prev = np.dot(dZ, self.W.T)

        # Mise à jour des paramètres
        self.W -= learning_rate * self.dW
        self.b -= learning_rate * self.db

        return dA_prev


In [3]:
class Model:
    def __init__(self, learning_rate=0.01):
        self.layers = []
        self.learning_rate = learning_rate

    def add(self, layer):
        self.layers.append(layer)

    def forward(self, X):
        for layer in self.layers:
            X = layer.forward(X)
        return X

    def backward(self, y_true, y_pred):
        m = y_true.shape[0]
        dA = (y_pred - y_true)  # dérivée de la MSE sans division par m ici

        for layer in reversed(self.layers):
            dA = layer.backward(dA, self.learning_rate)

    def loss(self, y_true, y_pred):
        loss = np.mean((y_true - y_pred) ** 2)
        return loss / 2

    def train(self, X, y, epochs=1000):
        for epoch in range(epochs):
            y_pred = self.forward(X)
            loss = self.loss(y, y_pred)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

            self.backward(y, y_pred)

    def predict(self, X):
        y_pred = self.forward(X)
        return (y_pred > 0.5).astype(int)


In [4]:
if __name__ == "__main__":
    # Données XOR
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y = np.array([[0], [1], [1], [0]])

    # Création du modèle
    model = Model(learning_rate=0.1)
    model.add(Layer(input_size=2, output_size=3, activation='relu', seed=1))     # Couche cachée
    model.add(Layer(input_size=3, output_size=1, activation='sigmoid', seed=2))   # Couche de sortie

    # Entraînement
    model.train(X, y, epochs=1000)

    # Prédictions
    predictions = model.predict(X)
    print("\nPrédictions sur XOR :")
    for i in range(len(X)):
        print(f"Entrée : {X[i]}, Prédiction : {predictions[i][0]}, Attendu : {y[i][0]}")


Epoch 0, Loss: 0.1431
Epoch 100, Loss: 0.0833
Epoch 200, Loss: 0.0359
Epoch 300, Loss: 0.0169
Epoch 400, Loss: 0.0098
Epoch 500, Loss: 0.0066
Epoch 600, Loss: 0.0048
Epoch 700, Loss: 0.0037
Epoch 800, Loss: 0.0030
Epoch 900, Loss: 0.0025

Prédictions sur XOR :
Entrée : [0 0], Prédiction : 0, Attendu : 0
Entrée : [0 1], Prédiction : 1, Attendu : 1
Entrée : [1 0], Prédiction : 1, Attendu : 1
Entrée : [1 1], Prédiction : 0, Attendu : 0
