In [2]:
import numpy as np

In [37]:
class neural_network:
    # TODO: ADD A WAY TO USE DIFFERENT HIDDEN LAYERS ACTIVATION FUNCTION AND LAST LAYER ACTIVATION FUNCIONS (GENERALLY: HIDDEN ARE RELU OR TANH)

    def __init__(self, architecture:list=[2,3,3,1], activation_function:str="sigmoid", learning_rate=0.01):
        self.architecture = architecture
        self.cache = dict()
        self.learning_rate = learning_rate
        
        # weights initialized using random numbers from normal distribution
        self.weights = [
            np.random.randn(architecture[x+1], architecture[x])
            for x in range(len(architecture)-1)
            ]
        
        # biases initialized using zeros for each neuron
        self.biases = [
            np.zeros((architecture[x+1], 1))
            for x in range(len(architecture)-1)
        ]
        
        # activation functions supported
        activation_functions_available = {
            "relu": lambda x: np.maximum(0, x),
            "tanh": np.tanh,
            "sigmoid": lambda x: ((1) / (1 + np.exp(-x)))
        }

        # validation of the activation function
        if activation_function.lower() in ["relu", "tanh", "sigmoid"]:
            self.activation_function = activation_functions_available[activation_function.lower()]
        else: 
            raise NameError("Activation Function not supported")

        derivate_activation_function = {
            # CHANGE THIS TO BE MORE SPECIFIC AND BETTER - PRINCIPALLY TANH
            "relu": lambda Z: (Z > 0).astype(float),
            "tanh": lambda Z: 1 - self.activation_function(Z)**2,
            "sigmoid": lambda Z: self.activation_function(Z) * (1 - self.activation_function(Z))
        }
    
        self.derivate_activation_function = derivate_activation_function[activation_function.lower()]

        
    def input_data(self, data, y):
        # TODO validation
        self.data = data
        self.y = y

    def _forward_propagation(self):
        """
        Basic formula:
        Z[l] = W[l] A[l-1] + b[l]
        A[l] = g(Z[l])
        
        Where:
            - l: Current Layer
            - W: Weights
            - A: Activation Vector
            - b: Biases
            - g: Activation Function
        """
        # dict to save the values of each layer
        A = self.data
        self.cache["A0"] = A
        for layer_idx in range(len(self.architecture)-1):
            Z = self.weights[layer_idx] @ A + self.biases[layer_idx]
            A = self.activation_function(Z)
            self.cache[f"A{layer_idx + 1}"] = A
            self.cache[f"Z{layer_idx + 1}"] = Z
        y_hat = A
        return y_hat
    
    def _calculate_loss(self, y_hat):
        """
        Use of cross entropy to calculate the loss

        For a single example:
            - L(y_hat, y) = -(y * log y_hat + (1 - y) * log(1 - y_hat))

        For all training samples:
            - C = (1 / m) * sum(L(y_hat, y))
        """
        
        # original data and prediction
        y = self.y
        # y_hat = self._forward_propagation()

        # loss calculation based on the matrices
        prediction_losses = -((y * np.log(y_hat)) + (1 - y) * np.log(1 - y_hat))

        # num of entities extracted to get the global loss
        y_total = y_hat.reshape(-1).shape[0]

        # global loss
        losses_sum = (1 / y_total) * np.sum(prediction_losses, axis=1)

        return np.sum(losses_sum)

    def _backpropagation(self, y_hat, y_real, m):
        """ULTIMO LAYER: DERIVADA DA FUNCAO DE ATIVAÇÃO ISOLADA"""
        gradient_W = [None] * len(self.weights)
        gradient_b = [None] * len(self.weights)

        # last layer dZ
        # only one that uses explicitly the derivative dC/dZ
        # predicted value - real value * scalar factor (1/m)
        # TODO: THIS ONE ISALL  BOUT THE SIGMOID FUNCTION, ADAPT IT TO RECEIVE THE RELU DERIVATIVE + TANH DERIVATIVE
        dZ = (1/m) * (y_hat - y_real)

        for layer_idx in reversed(range(len(self.weights))):
            
            # correto (dW = dZ * A^t[l-1])
            W = self.cache[f"A{layer_idx}"]
            dW = dZ @ W.T
            
            #correto (db = sum(dZ))
            db = np.sum(dZ, axis=1, keepdims=True)

            # saving the weights and biases gradients
            gradient_W[layer_idx] = dW
            gradient_b[layer_idx] = db

            if layer_idx > 0:
                # correto - derivada da camada anterior (w^t * dz)
                dA_back = self.weights[layer_idx]
                dA_back = dA_back.T @ dZ

                dZ = dA_back * self.derivate_activation_function(self.cache[f"Z{layer_idx}"])

        for layer_idx in range(len(self.weights)):
            # weights and biases adaptation using gradient descent
            # theta = theta - learning rate * slope     (derivative)
            self.weights[layer_idx] -= self.learning_rate * gradient_W[layer_idx]
            self.biases[layer_idx] -= self.learning_rate * gradient_b[layer_idx]

In [39]:
nn = neural_network()

def prepare_data():
  X = np.array([
      [150, 70],
      [254, 73],
      [312, 68],
      [120, 60],
      [154, 61],
      [212, 65],
      [216, 67],
      [145, 67],
      [184, 64],
      [130, 69]
  ])
  y = np.array([0,1,1,0,0,1,1,0,1,0])
  m = 10
  A0 = X.T
  Y = y.reshape(1, m)

  return A0, Y, m

A0, Y, m = prepare_data()

nn.input_data(A0, Y)

# print(nn.weights)
# print(nn.biases)

# y_hat = nn._forward_propagation()
# loss1 = nn._calculate_loss(y_hat)

# nn._backpropagation(y_hat, Y, m)

# y_hat2 = nn._forward_propagation()
# loss2 = nn._calculate_loss(y_hat2)

# print(nn.weights)
# print(nn.biases)


# print(loss1, loss2)

for i in range(100000):
  y_hat = nn._forward_propagation()
  print(nn._calculate_loss(y_hat))
  nn._backpropagation(y_hat, Y, m)
# print(loss)

0.9432649362688794
0.9405609202705394
0.9378808186167389
0.9352245220940208
0.9325919201722805
0.9299829010350085
0.9273973516097265
0.9248351575985924
0.9222962035091602
0.9197803726852737
0.9172875473380796
0.9148176085771397
0.9123704364416261
0.9099459099315823
0.9075439070392323
0.9051643047803215
0.9028069792254734
0.9004718055315433
0.8981586579729561
0.8958674099730097
0.8935979341351292
0.8913501022740582
0.8891237854469684
0.8869188539844761
0.8847351775215501
0.8825726250282973
0.8804310648406075
0.8783103646906542
0.8762103917372266
0.874131012595888
0.8720720933689434
0.8700334996752082
0.8680150966795624
0.8660167491222803
0.8640383213481263
0.8620796773352037
0.8601406807235457
0.8582211948434418
0.8563210827434878
0.8544402072183495
0.8525784308362319
0.8507356159660475
0.8489116248042715
0.8471063194014793
0.845319561688558
0.8435512135025853
0.8418011366123663
0.8400691927436258
0.8383552436038505
0.8366591509067693
0.8349807763964736
0.833319981871169
0.8316766292065

In [None]:
import numpy as np

# 1. create network architecture
L = 3
n = [2, 3, 3, 1]

# 2. create weights and biases
W1 = np.random.randn(n[1], n[0])
W2 = np.random.randn(n[2], n[1])
W3 = np.random.randn(n[3], n[2])
b1 = np.random.randn(n[1], 1)
b2 = np.random.randn(n[2], 1)
b3 = np.random.randn(n[3], 1)

# 3. create training data and labels
def prepare_data():
  X = np.array([
      [150, 70],
      [254, 73],
      [312, 68],
      [120, 60],
      [154, 61],
      [212, 65],
      [216, 67],
      [145, 67],
      [184, 64],
      [130, 69]
  ])
  y = np.array([0,1,1,0,0,1,1,0,1,0])
  m = 10
  A0 = X.T
  Y = y.reshape(n[L], m)

  return A0, Y

# 4. create activation function
def sigmoid(arr):
  return 1 / (1 + np.exp(-1 * arr))

# 5. create feed forward process
def feed_forward(A0):

  # layer 1 calculations
  Z1 = W1 @ A0 + b1
  A1 = sigmoid(Z1)

  # layer 2 calculations
  Z2 = W2 @ A1 + b2
  A2 = sigmoid(Z2)

  # layer 3 calculations
  Z3 = W3 @ A2 + b3
  A3 = sigmoid(Z3)
  
  y_hat = A3
  return y_hat

A0, Y = prepare_data()
y_hat = feed_forward(A0)
y_hat

array([[0.90754329, 0.91390799, 0.91390799, 0.88710582, 0.91376846,
        0.91390797, 0.91390797, 0.90849836, 0.913906  , 0.86624987]])

In [None]:
class neuron:
    # SET CONNECTION HERE? MAY ITERATE EACH NEURON FOR EACH ONE OF THE NEXT LAYER WITH RANDOM UNIFORM NUMBERS...
    # IF NEURON1 == NONE -> INPUT NEURONS, IF NEURON2 == NONE -> OUTPUT NEURON
    # BIAS ADDITION GOES AFTER WEIGHT MULTIPLICATION
    # act_func*w + b
    # TO MAKE USE OF MATRICES FOR THE CALCULATIONS
    @staticmethod
    def _relu(x:float):
        return (max(0, x))

    @staticmethod
    def _tanh(x:float):
        return ((np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x)))

    @staticmethod
    def _sigmoid(x):
        return ((1) / (1 + np.exp(-x)))
    
    def __init__(self, neuron_bias:float=None, activation_function:str="sigmoid"):
        self.bias = np.random.randn() if neuron_bias is None else neuron_bias
        self.activation_functions = {
            "relu": self._relu,
            "tanh": self._tanh,
            "sigmoid": self._sigmoid
        }
        if activation_function.lower() in ["relu", "tanh", "sigmoid"]:
            self.activation_function = self.activation_functions[activation_function.lower()]
        else:
            raise NameError ("Função de ativação indisponível")
        self.output = None

    def activate(self, inputs, weights, bias):
        weighted_inputs_with_bias = np.dot(inputs, weights) + bias
        self.output = self.activation_function(weighted_inputs_with_bias)
        return self.output