In [34]:
import numpy as np, pandas as pd
from sklearn.preprocessing import LabelEncoder

class DenseLayer:
    def __init__(self, neurons):
        self.neurons = neurons
        
    def relu(self, inputs):
        return np.maximum(0, inputs)

    def softmax(self, inputs):
        exp_scores = np.exp(inputs)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        return probs
    
    def relu_derivative(self, dA, Z):
        dZ = np.array(dA, copy = True)
        dZ[Z <= 0] = 0
        return dZ
    
    def forward(self, inputs, weights, bias, activation):
        Z_curr = np.dot(inputs, weights.T) + bias
        
        if activation == 'relu':
            A_curr = self.relu(inputs=Z_curr)
        elif activation == 'softmax':
            A_curr = self.softmax(inputs=Z_curr)
            
        return A_curr, Z_curr
    
    def backward(self, dA_curr, W_curr, Z_curr, A_prev, activation):
        if activation == 'softmax':
            dW = np.dot(A_prev.T, dA_curr)
            db = np.sum(dA_curr, axis=0, keepdims=True)
            dA = np.dot(dA_curr, W_curr) 
        else:
            dZ = self.relu_derivative(dA_curr, Z_curr)
            dW = np.dot(A_prev.T, dZ)
            db = np.sum(dZ, axis=0, keepdims=True)
            dA = np.dot(dZ, W_curr)
            
        return dA, dW, db

class Network:
    def __init__(self):
        self.network = [] ## layers
        self.architecture = [] ## mapping input neurons --> output neurons
        self.params = [] ## W, b
        self.memory = [] ## Z, A
        self.gradients = [] ## dW, db
        
    def add(self, layer):
        self.network.append(layer)
            
    def _compile(self, data):
        for idx, layer in enumerate(self.network):
            if idx == 0:
                self.architecture.append({'input_dim':data.shape[1], 'output_dim':self.network[idx].neurons,
                                         'activation':'relu'})
            elif idx > 0 and idx < len(self.network)-1:
                self.architecture.append({'input_dim':self.network[idx-1].neurons, 'output_dim':self.network[idx].neurons,
                                         'activation':'relu'})
            else:
                self.architecture.append({'input_dim':self.network[idx-1].neurons, 'output_dim':self.network[idx].neurons,
                                         'activation':'softmax'})
        return self
    
    def _init_weights(self, data):
        self._compile(data)
        
        np.random.seed(99)
        
        for i in range(len(self.architecture)):
            self.params.append({
                'W':np.random.uniform(low=-1, high=1, 
                  size=(self.architecture[i]['output_dim'], 
                        self.architecture[i]['input_dim'])),
                'b':np.zeros((1, self.architecture[i]['output_dim']))})
        
        return self
    
    def _forwardprop(self, data):
        A_curr = data
        
        for i in range(len(self.params)):
            A_prev = A_curr
            A_curr, Z_curr = self.network[i].forward(inputs=A_prev, weights=self.params[i]['W'], 
                                           bias=self.params[i]['b'], activation=self.architecture[i]['activation'])
            
            self.memory.append({'inputs':A_prev, 'Z':Z_curr})
            
        return A_curr
    
    def _backprop(self, predicted, actual):
        num_samples = len(actual)
        
        ## compute the gradient on predictions
        dscores = predicted
        dscores[range(num_samples),actual] -= 1
        dscores /= num_samples
        
        dA_prev = dscores
        
        for idx, layer in reversed(list(enumerate(self.network))):
            dA_curr = dA_prev
            
            A_prev = self.memory[idx]['inputs']
            Z_curr = self.memory[idx]['Z']
            W_curr = self.params[idx]['W']
            
            activation = self.architecture[idx]['activation']

            dA_prev, dW_curr, db_curr = layer.backward(dA_curr, W_curr, Z_curr, A_prev, activation)

            self.gradients.append({'dW':dW_curr, 'db':db_curr})
            
    def _update(self, lr=0.01):
        for idx, layer in enumerate(self.network):
            self.params[idx]['W'] -= lr * list(reversed(self.gradients))[idx]['dW'].T  
            self.params[idx]['b'] -= lr * list(reversed(self.gradients))[idx]['db']
    
    def _get_accuracy(self, predicted, actual):
        return np.mean(np.argmax(predicted, axis=1)==actual)
    
    def _calculate_loss(self, predicted, actual):
        samples = len(actual)
        
        correct_logprobs = -np.log(predicted[range(samples),actual])
        data_loss = np.sum(correct_logprobs)/samples

        return data_loss
    
    def train(self, X_train, y_train, epochs):
        self.loss = []
        self.accuracy = []
        
        self._init_weights(X_train)
        
        for i in range(epochs):
            yhat = self._forwardprop(X_train)
            self.accuracy.append(self._get_accuracy(predicted=yhat, actual=y_train))
            self.loss.append(self._calculate_loss(predicted=yhat, actual=y_train))
            
            self._backprop(predicted=yhat, actual=y_train)
            
            self._update()
            
            if i % 20 == 0:
                s = 'EPOCH: {}, ACCURACY: {}, LOSS: {}'.format(i, self.accuracy[-1], self.loss[-1])
                print(s)

if __name__ == '__main__':
    def get_data(path):
        data = pd.read_csv(path, index_col=0)

        cols = list(data.columns)
        target = cols.pop()

        X = data[cols].copy()
        y = data[target].copy()

        y = LabelEncoder().fit_transform(y)

        return np.array(X), np.array(y)

    X, y = get_data(r'iris.csv')

    model = Network()
    model.add(DenseLayer(6))
    model.add(DenseLayer(8))
    model.add(DenseLayer(10))
    model.add(DenseLayer(3))

    model.train(X_train=X, y_train=y, epochs=200)

EPOCH: 0, ACCURACY: 0.3333333333333333, LOSS: 8.40744717002989
EPOCH: 20, ACCURACY: 0.4, LOSS: 0.9215854842299206
EPOCH: 40, ACCURACY: 0.43333333333333335, LOSS: 0.7536116948198671
EPOCH: 60, ACCURACY: 0.42, LOSS: 0.6714779014672261
EPOCH: 80, ACCURACY: 0.41333333333333333, LOSS: 0.6594143979448671
EPOCH: 100, ACCURACY: 0.6666666666666666, LOSS: 0.5259943503852046
EPOCH: 120, ACCURACY: 0.6666666666666666, LOSS: 0.4706373583820735
EPOCH: 140, ACCURACY: 0.6666666666666666, LOSS: 0.5053203560733265
EPOCH: 160, ACCURACY: 0.48, LOSS: 1.0150613941350848
EPOCH: 180, ACCURACY: 0.8333333333333334, LOSS: 0.46065855297586067


In [20]:
# Define the neural network architecture
input_size = 2
hidden_size1 = 8
hidden_size2 = 8
output_size = 1
learning_rate = 0.01

# Initialize weights and biases
np.random.seed(0)
weights_input_hidden1 = np.random.randn(input_size, hidden_size1)
bias_hidden1 = np.zeros((1, hidden_size1))
weights_hidden1_hidden2 = np.random.randn(hidden_size1, hidden_size2)
bias_hidden2 = np.zeros((1, hidden_size2))
weights_hidden2_output = np.random.randn(hidden_size2, output_size)
bias_output = np.zeros((1, output_size))

# Define the sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Define the derivative of the sigmoid function
def sigmoid_derivative(x):
    return x * (1 - x)

# Training loop
num_epochs = 10000
losses = []


In [29]:
hidden1_input = np.dot(X, weights_input_hidden1) + bias_hidden1
hidden1_output = sigmoid(hidden1_input)
print(hidden1_output.shape)

hidden2_input = np.dot(hidden1_output, weights_hidden1_hidden2) + bias_hidden2
hidden2_output = sigmoid(hidden2_input)
print(hidden2_output.shape)

output = np.dot(hidden2_output, weights_hidden2_output) + bias_output
output_prob = sigmoid(output)
print(output_prob.shape)

loss = -np.mean(y * np.log(output_prob) + (1 - y) * np.log(1 - output_prob)) # cross entropy loss
losses.append(loss)

print(loss)

d_output = output_prob - y
d_hidden2 = d_output.dot(weights_hidden2_output.T) * sigmoid_derivative(hidden2_output)
d_hidden1 = d_hidden2.dot(weights_hidden1_hidden2.T) * sigmoid_derivative(hidden1_output)


(100, 8)
(100, 8)
(100, 1)
0.7221645680206485


ValueError: shapes (100,100) and (1,8) not aligned: 100 (dim 1) != 1 (dim 0)

In [None]:

for epoch in range(num_epochs):
    # Forward propagation
    hidden1_input = np.dot(X, weights_input_hidden1) + bias_hidden1
    hidden1_output = sigmoid(hidden1_input)
    
    hidden2_input = np.dot(hidden1_output, weights_hidden1_hidden2) + bias_hidden2
    hidden2_output = sigmoid(hidden2_input)
    
    output = np.dot(hidden2_output, weights_hidden2_output) + bias_output
    output_prob = sigmoid(output)

    # Calculate the loss (binary cross-entropy)
    loss = -np.mean(y * np.log(output_prob) + (1 - y) * np.log(1 - output_prob))
    losses.append(loss)

    # Backpropagation
    d_output = output_prob - y
    d_hidden2 = d_output.dot(weights_hidden2_output.T) * sigmoid_derivative(hidden2_output)
    d_hidden1 = d_hidden2.dot(weights_hidden1_hidden2.T) * sigmoid_derivative(hidden1_output)

    weights_hidden2_output -= hidden2_output.T.dot(d_output) * learning_rate
    bias_output -= np.sum(d_output, axis=0, keepdims=True) * learning_rate
    weights_hidden1_hidden2 -= hidden1_output.T.dot(d_hidden2) * learning_rate
    bias_hidden2 -= np.sum(d_hidden2, axis=0, keepdims=True) * learning_rate
    weights_input_hidden1 -= X.T.dot(d_hidden1) * learning_rate
    bias_hidden1 -= np.sum(d_hidden1, axis=0, keepdims=True) * learning_rate

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}: Loss = {loss}")

# Plot the loss curve
plt.plot(losses)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss Curve')
plt.show()

# Generate some test data for predictions
X_test = np.array([[1.2, 1.3], [-1.2, -1.3]])

# Forward propagation for test data
hidden1_input_test = np.dot(X_test, weights_input_hidden1) + bias_hidden1
hidden1_output_test = sigmoid(hidden1_input_test)

hidden2_input_test = np.dot(hidden1_output_test, weights_hidden1_hidden2) + bias_hidden2
hidden2_output_test = sigmoid(hidden2_input_test)

output_test = np.dot(hidden2_output_test, weights_hidden2_output) + bias_output
output_prob_test = sigmoid(output_test)

print("Predictions for test data:")
print(output_prob_test)


In [32]:
import numpy as np
import matplotlib.pyplot as plt

# Generate some random data for training
np.random.seed(0)
X = np.random.randn(100, 2)
y = (X[:, 0] * X[:, 1] > 0).astype(int)

# Define the neural network architecture
input_size = 2
hidden_size1 = 8
hidden_size2 = 8
output_size = 1
learning_rate = 0.01

# Initialize weights and biases
np.random.seed(0)
weights_input_hidden1 = np.random.randn(input_size, hidden_size1)
bias_hidden1 = np.zeros((1, hidden_size1))
weights_hidden1_hidden2 = np.random.randn(hidden_size1, hidden_size2)
bias_hidden2 = np.zeros((1, hidden_size2))
weights_hidden2_output = np.random.randn(hidden_size2, output_size)
bias_output = np.zeros((1, output_size))

# Define the sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Define the derivative of the sigmoid function
def sigmoid_derivative(x):
    return x * (1 - x)

# Training loop
num_epochs = 10000
losses = []

for epoch in range(num_epochs):
    # Forward propagation
    hidden1_input = np.dot(X, weights_input_hidden1) + bias_hidden1
    hidden1_output = sigmoid(hidden1_input)
    
    hidden2_input = np.dot(hidden1_output, weights_hidden1_hidden2) + bias_hidden2
    hidden2_output = sigmoid(hidden2_input)
    
    output = np.dot(hidden2_output, weights_hidden2_output) + bias_output
    output_prob = sigmoid(output)

    # Calculate the loss (binary cross-entropy)
    loss = -np.mean(y * np.log(output_prob) + (1 - y) * np.log(1 - output_prob))
    losses.append(loss)

    # Backpropagation
    d_output = output_prob - y
    d_hidden2 = d_output.dot(weights_hidden2_output.T) * sigmoid_derivative(hidden2_output)
    d_hidden1 = d_hidden2.dot(weights_hidden1_hidden2.T) * sigmoid_derivative(hidden1_output)

    weights_hidden2_output -= hidden2_output.T.dot(d_output) * learning_rate
    bias_output -= np.sum(d_output, axis=0, keepdims=True) * learning_rate
    weights_hidden1_hidden2 -= hidden1_output.T.dot(d_hidden2) * learning_rate
    bias_hidden2 -= np.sum(d_hidden2, axis=0, keepdims=True) * learning_rate
    weights_input_hidden1 -= X.T.dot(d_hidden1) * learning_rate
    bias_hidden1 -= np.sum(d_hidden1, axis=0, keepdims=True) * learning_rate

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}: Loss = {loss}")

# Plot the loss curve
plt.plot(losses)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss Curve')
plt.show()

# Generate some test data for predictions
X_test = np.array([[1.2, 1.3], [-1.2, -1.3]])

# Forward propagation for test data
hidden1_input_test = np.dot(X_test, weights_input_hidden1) + bias_hidden1
hidden1_output_test = sigmoid(hidden1_input_test)

hidden2_input_test = np.dot(hidden1_output_test, weights_hidden1_hidden2) + bias_hidden2
hidden2_output_test = sigmoid(hidden2_input_test)

output_test = np.dot(hidden2_output_test, weights_hidden2_output) + bias_output
output_prob_test = sigmoid(output_test)

print("Predictions for test data:")
print(output_prob_test)


ValueError: shapes (100,100) and (1,8) not aligned: 100 (dim 1) != 1 (dim 0)