In [None]:
import numpy as np


class NeuralNet:
    def __init__(self, f_layer_c, hidden_layers_neuron_c, last_layer_c):
        self.f_layer_c = f_layer_c
        self.hidden_layers_c = len(hidden_layers_neuron_c)
        self.hidden_layers_neuron_c = hidden_layers_neuron_c
        self.last_layer_c = last_layer_c

        self.total_layer_count = 2 + self.hidden_layers_c

        self.act_function = self.sigmoid
        self.act_derivative = self.derivative_sigmoid

        self.weights = None
        self.biases = None

        self.__init_weights_biases()

    def __init_weights_biases(self):
        matrices_count = self.hidden_layers_c + 1  # first + hidden_count + last - 1

        layers_counts = [self.f_layer_c] + self.hidden_layers_neuron_c + [self.last_layer_c]
        weights = list()
        biases = list()

        for i in range(1, matrices_count + 1):
            bias_m = np.zeros((layers_counts[i], 1))
            biases.append(bias_m)

            weight_m = np.random.randn(layers_counts[i], layers_counts[i - 1]) * np.sqrt(2 / layers_counts[i - 1])
            weights.append(weight_m)

        self.weights = np.array(weights, dtype=object)
        self.biases = np.array(biases, dtype=object)

    def predict(self, input_data: np.ndarray):
        for w, b in zip(self.weights, self.biases):
            activation_input = np.dot(w, input_data) + b
            input_data = self.act_function(activation_input)

        return np.argmax(input_data)

    def train(self, training_set: np.ndarray, training_labels: np.ndarray, batch_size: int, theta: np.float = 1.0,
              epochs: int = 3):
        total_size = len(training_set)
        indices = np.arange(total_size)
        weights_count = self.total_layer_count - 1

        for _ in range(epochs):
            np.random.shuffle(indices)

            for i in range(0, total_size, batch_size):
                batch_i = indices[i: i + batch_size]

                total_d_weights = [np.zeros(w.shape) for w in self.weights]
                total_d_biases = [np.zeros(b.shape) for b in self.biases]

                for j in batch_i:
                    delta_weights, delta_biases = self.back_propagate(training_set[j],
                                                                      NeuralNet.label_to_vector(training_labels[j]))

                    for k in range(weights_count):
                        total_d_weights[k] += delta_weights[k]
                        total_d_biases[k] += delta_biases[k]

                for l in range(weights_count):
                    self.weights[l] -= (theta / batch_size) * total_d_weights[l]
                    self.biases[l] -= (theta / batch_size) * total_d_biases[l]

    def back_propagate(self, input_val: np.ndarray, expected_val: np.ndarray):
        delta_weights = [np.zeros(w.shape) for w in self.weights]
        delta_biases = [np.zeros(b.shape) for b in self.biases]

        active_f = input_val
        results = []
        activation_results = [active_f]

        for weight, bias in zip(self.weights, self.biases):
            res = np.dot(weight, active_f) + bias
            results.append(res)
            active_f = self.act_function(res)
            activation_results.append(active_f)

        last_index = -1
        delta = self.derivative_cost_function(activation_results[-1], expected_val) * self.act_derivative(results[-1])

        delta_weights[last_index] = np.dot(delta, activation_results[-2].transpose())
        delta_biases[last_index] = delta
        last_index -= 1
        for w, result, act_result in zip(reversed(self.weights), reversed(results[:-1]),
                                         reversed(activation_results[:-2])):
            delta = np.dot(w.transpose(), delta) * self.act_derivative(result)
            delta_weights[last_index] = np.dot(delta, act_result.transpose())
            delta_biases[last_index] = delta
            last_index -= 1

        return delta_weights, delta_biases

    @staticmethod
    def cost_function(output_val: np.ndarray, expected_val: np.ndarray):
        return np.sum((output_val - expected_val) ** 2) / (2 * expected_val.shape[0])

    @staticmethod
    def derivative_cost_function(output_val: np.ndarray, expected_val: np.ndarray):
        return output_val - expected_val


    @staticmethod
    def sigmoid(x):
        x = 1.0 / (1.0 + np.exp(-x))
        return x

    @staticmethod
    def derivative_sigmoid(x):
        sigmoid = NeuralNet.sigmoid(x)
        return sigmoid * (1 - sigmoid)

