In [1]:
import numpy as np
import csv

In [17]:
class Layer:
    def __init__(self, i, input_size, output_size):
        self.input_size, self.output_size = input_size, output_size
        self.i = i
        self.output, self.grad = self.initialize_output_param()
        
    def initialize_output_param(self):
        # Xavier initialization for weights
        weight_variance = 2.0 / (self.input_size + self.output_size)
        initialized_weights = np.random.normal(0, np.sqrt(weight_variance), (self.input_size, self.output_size))
        initialized_weights = np.abs(initialized_weights)  # Ensure positive weights

        # Initialized parameters
        output = {
            "x": None,
            "A": np.random.randn(self.input_size, self.output_size),
            "b": np.random.randn(self.output_size),
            "u": None,
            "v": None,
            "z": None,
            "L": None
        }
        
        grad = {
            "dL/dz": None,
            "du/dz": None,
            "dz/dv": None,
            "dv/du": None,
            "dv/db": None,
            "du/dx": None,
            "du/dA": None,
            "dL/dA": None,
            "dL/db": None
        }
        
        return output, grad
    
    def ReLu(self, x, direction, max_layer = None, a = 0.01):
        if direction == "forward":
            return np.maximum(a*x, x) # Applying ReLU element-wise
        
        elif direction == "backward":
                if max_layer == True:
                    return np.where(x > 0, 1, 0)
                elif max_layer == False:
                    return np.maximum(a*x, x)
            
    def sigmoid(self, x, direction, max_layer = None, a_L = 0.3):
        if direction == "forward":
            return 1 / (1 + np.exp(-a_L * x)) # Applying ReLU element-wise
        
        elif direction == "backward":
                if max_layer == True:
                    return a_L * self.grad["dL/dz"] * x * (1 - x)
                elif max_layer == False:
                    return None

                
    def forward_prop(self, x, layer = None):
        self.output["x"] = x
        self.output["u"] = np.matmul(x.T, self.output["A"])
        self.output["v"] = self.output["b"] + self.output["u"]
        
        if layer == "output":
            self.output["z"] = self.sigmoid(self.output["v"], "forward")
            self.output["L"] = self.output["z"]
        else:
            self.output["z"] = self.ReLu(self.output["v"], "forward")

        return self.output["z"].T
    
    def backward_prop(self, layer, max_layer, y = None, dudz = None, A = None):
        if layer == max_layer:
            self.grad["dL/dz"] = (self.output["L"] - y.T)
            self.grad["dz/dv"] = self.sigmoid(self.output["L"], "backward", True).T
            self.grad["dv/db"] = self.grad["dz/dv"].reshape(-1,1)*self.output["z"].reshape(-1,1)
            
        else:
            self.grad["du/dz"] = dudz
            self.grad["dz/dv"] = self.ReLu(self.grad["du/dz"], "backward", False)
            self.grad["dv/db"] = self.grad["dz/dv"].reshape(-1,1)*self.output["z"].reshape(-1,1)

        
        #print(self.grad["dv/db"])   
        self.grad["dv/du"] = self.grad["dz/dv"].reshape(-1,1)*self.output["z"].reshape(-1,1)
        self.grad["du/dA"] = (self.grad["dv/du"] * self.output["x"].T).T
        self.grad["du/dx"] = np.dot(self.output["A"], self.grad["dv/du"].reshape(-1, 1))
        return self.grad["du/dx"], self.grad["du/dA"]
    
    def update_weights_bias(self, layer, max_layer, mult_grad, learning_rate = 0.003):
        if layer == max_layer:
            self.output["A"] -= learning_rate * (self.grad["du/dA"])
            self.output["b"] -= learning_rate * (self.grad["dv/db"]).T.flatten()
            self.grad["dL/dA"] = (self.grad["du/dA"])
            self.grad["dL/db"] = (self.grad["du/dA"])
            return self.grad["du/dx"]
        else:
            #print(self.grad["dv/db"].shape)
            #print(mult_grad.shape)
            self.output["A"] -= learning_rate * ((mult_grad * self.grad["du/dA"].T).T)
            self.output["b"] -= learning_rate * ((mult_grad * self.grad["dv/db"])).T.flatten()
            self.grad["dL/dA"] = ((mult_grad * self.grad["du/dA"].T).T)
            self.grad["dL/db"] = ((mult_grad * self.grad["dv/db"])).T.flatten()
            return self.grad["du/dx"]
    
    
class Neural_Network:
    def __init__(self):
        self.layers = []    
    
    def add_layer(self, layer):
        self.layers.append(layer)
        
    def forward_pass(self, x):
        i = 1
        max = len(self.layers)
        # Perform a forward pass for all the layers
        for layer in self.layers:
            if i == max:
                x = layer.forward_prop(x, "output")
            else:
                x = layer.forward_prop(x)
            #print(f"\n\033[1mOutput for layer {i}: \033[0m")
            #print(x)
            i += 1
        return x
    
    def backward_pass(self, y, grad = None, grad_mult = None):
        
        i = len(self.layers) + 1
        #Perform a backward pass for all the layers
        for layer in reversed(self.layers):
            grad, grad_2 = layer.backward_prop(i, len(self.layers) + 1, y, grad)
            grad_mult = layer.update_weights_bias(i, len(self.layers) + 1, grad_mult, learning_rate = 1.0)
            #print(f"\n\033[1mGradient for layer {i-1}: \033[0m")
            #print(grad_2)
            i -= 1
        return None

def print_results(network):
    i = 0
    print("\n\nHere is the cache and parameters after the last epoch: ")
    for layer in network.layers:
        print("\n\n\033[1mFor layer \033[0m", i+1, " :")
        print("")
        
        if hasattr(layer, 'grad') and isinstance(layer.grad, dict):
            print("Outputs:")
            if not layer.output: 
                print("No outputs")
            else:
                for key, value in layer.output.items():
                    print(f"\033[1m{key}:\033[0m")
                    print(value) 
                    
            print("\nGradients:")
            if not layer.grad:  
                print("No gradients")
            else:
                for key, value in layer.grad.items():
                    print(f"\033[1m{key}:\033[0m")
                    print(value) 
            print()
        else:
            print(f"Layer: {layer.__class__.__name__} has no or invalid 'grad' attribute.")
        i += 1
    return None
        
    
def Train_Computational_Graph_NN(data_point, label, layer_num, epochs = 55, tol = 1e-5, mini_batch_size = 10):
    x_size = data_point.shape[1]
    num_batches = len(data_point) // mini_batch_size
    prev_avg_error = 9999

    # Building the neural network
    network = Neural_Network()
    for i in range(layer_num):
        if i == 0:
            output_size = int(input(f"Enter output size for layer {i + 1}: "))
            network.add_layer(Layer(i, input_size=x_size, output_size=output_size))
        else:
            input_size = output_size
            output_size = int(input(f"Enter output size for layer {i + 1}: "))
            network.add_layer(Layer(i, input_size=input_size, output_size=output_size))

    print("\nTraining the dataset...")
    for epoch in range(epochs):
        shuffled_indices = np.random.permutation(len(data_point))
        shuffled_data = data_point[shuffled_indices]
        shuffled_labels = label[shuffled_indices]
        avg_sum_error = 0 # Compute the average squared error for the epoch
        for batch in range(num_batches):
            start_idx = batch * mini_batch_size
            end_idx = start_idx + mini_batch_size
            mini_batch_data = shuffled_data[start_idx:end_idx]
            mini_batch_labels = shuffled_labels[start_idx:end_idx]

            for x, y in zip(mini_batch_data, mini_batch_labels):
                desired_output = int(y)
                one_hot_encoded = np.zeros(3).T  # Create an array of zeros of length num_classes
                one_hot_encoded[desired_output - 1] = 1
                #print(one_hot_encoded)
                x_input = np.array(x).T
                y_output = network.forward_pass(x_input) # Here is the output of A3 from the self.forward_propagation
                #print("output:")
                #print(y_output)
                loss = one_hot_encoded - y_output
                #print("loss")
                #print(loss)
                sse_error = (1/2)*np.sum(loss ** 2) # Compute sum of squares error
                avg_sum_error += sse_error # Summation of the sum of squares error per epoch
                network.backward_pass(one_hot_encoded) # We do back propagation using the output
                #print_results(network)
        
        # Compute Average Error per epoch
        avg_error = (1/len(shuffled_data))*avg_sum_error

        #print average error per epoch
        print(f"Epoch {epoch + 1}: ", "average error: ", avg_error)


        # Check if the change in error is smaller than the tolerance (tol)
        if abs(prev_avg_error - avg_error) < tol:
            print(f"Converged. Stopping training for epoch {epoch}, average error: ", avg_error)
            break

        prev_avg_error = avg_error  # Update previous error for the next iteration 

    print_results(network)
    return network

def import_data_set(File_dir):
    # Initialize labels and features to store data
    labels = []
    features = []

    # Read the CSV file
    with open(File_dir, newline='') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            labels.append(float(row[0])) 
            features.append([float(x) for x in row[1:]]) 

    # Convert lists to NumPy arrays
    features = np.array(features)
    labels = np.array(labels)

    # Shuffle the dataset
    np.random.seed(42)  # For reproducibility, set seed if desired
    shuffled_indices = np.random.permutation(len(labels))
    features = features[shuffled_indices]
    labels = labels[shuffled_indices]

    # Get unique class labels and their counts
    unique_labels, counts = np.unique(labels, return_counts=True)

    # Calculate the number of samples for each class in the training set
    train_samples_per_class = int(0.8 * len(labels) / len(unique_labels))

    # Initialize arrays to store indices for training and test sets
    train_indices = []
    test_indices = []

    # Split the indices for each class into training and test sets
    for lbl in unique_labels:
        indices = np.where(labels == lbl)[0]
        np.random.shuffle(indices)
        train_indices.extend(indices[:train_samples_per_class])
        test_indices.extend(indices[train_samples_per_class:])

    # Convert indices to NumPy arrays
    train_indices = np.array(train_indices)
    test_indices = np.array(test_indices)

    # Split the dataset into training and test sets based on indices
    train_data = features[train_indices]
    train_labels = labels[train_indices]

    test_data = features[test_indices]
    test_labels = labels[test_indices]

    return train_data, train_labels, test_data, test_labels



File_dir = r'C:\Users\Jeryl Salas\OneDrive\Documents\AI 211 Computational Linear Algebra\Stochastic Gradient Descent\Iris_Dataset.csv'  
layer_num = int(input("How many layers do you want in your neural network: "))
tr_d, tr_l, ts_d, ts_l = import_data_set(File_dir)
network = Train_Computational_Graph_NN(tr_d, tr_l, layer_num)





Training the dataset...
Epoch 1:  average error:  0.39783686164282217
Epoch 2:  average error:  0.35793975094456276
Epoch 3:  average error:  0.3472158132816834
Epoch 4:  average error:  0.34122691948359907
Epoch 5:  average error:  0.33862662071397737
Epoch 6:  average error:  0.3366901032672609
Epoch 7:  average error:  0.33590798209444184
Epoch 8:  average error:  0.33512260032771424
Epoch 9:  average error:  0.3347334538434814
Epoch 10:  average error:  0.3345647094322917
Epoch 11:  average error:  0.33436609320639915
Epoch 12:  average error:  0.3342359870398876
Epoch 13:  average error:  0.3341945043567499
Epoch 14:  average error:  0.334162476795611
Epoch 15:  average error:  0.3341516812508895
Epoch 16:  average error:  0.33408061206897266
Epoch 17:  average error:  0.33410187099740063
Epoch 18:  average error:  0.33407017483541956
Epoch 19:  average error:  0.33406307514509703
Converged. Stopping training for epoch 18, average error:  0.33406307514509703


Here is the cache a