In [2]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import itertools

In [3]:
def pairwise(iterable):
        a, b = itertools.tee(iterable)
        next(b, None)
        return zip(a, b)  

In [49]:
class Utils:
    def __init__(self):
        self.iterations = 100  # количество итераций для градиентного спуска
        self.alpha = 0.1  # скорость обучения
        self.lamda = 0.5  # параметр для регуляризации
        self.n = 4  # слоев в нейросети
        self.m = [28, 3, 3, 1]  # массив числа нейронов дл каждого слоя
    
    
    @staticmethod
    def sigmoid(X):
        return 1 / (1 + np.exp(-X))

            
    @staticmethod
    def deriv_sigmoid(X):
        fx = sigmoid(X)
        return fx * (1 - fx)
    
    
    @staticmethod
    def mse_loss(Y, theta):
        return ((Y - theta) ** 2).mean()

    
    @staticmethod
    def softmax(X):
        out = np.exp(X)
        return out/np.sum(out)

    
    @staticmethod
    def cross_entropy_loss(Y, theta):
        return -np.sum(Y * np.log(theta)) 

    
    def backpropagation(self, deriv_grad_func, y, x, weights, theta, R):
        d_ypred = -2 * (y - theta[-1])
        sum_h = 0
        sum_old_h = 0
        old_w = list()
        for i in range(len(theta) - 1, 0, -1):
            if i == len(theta) - 1:
                old_w = weights[i]
                for k in range(len(theta[i])):
                    for z in range(len(weights[i][k])):
                        sum_h += weights[i][k][z] * theta[i - 1][z]
                    sum_h += R[i][k]
                    for z in range(len(weights[i][k])):
                        weights[i][k][z] -= self.alpha * d_ypred * theta[i - 1][z] * deriv_grad_func(sum_h)
                    R[i][k] -= self.alpha * d_ypred * deriv_grad_func(sum_h)
                    sum_h = 0
            else:
                present_w = weights[i]
                for k in range(len(theta[i])):
                    new_w = np.sum(old_w[k])
                    for z in range(len(old_w[k])):
                        sum_old_h += old_w[k][z] * theta[i][z]
                    for z in range(len(weights[i][k])):
                        sum_h += weights[i][k][z] * theta[i - 1][z]
                    sum_h += R[i][k]
                    for z in range(len(weights[i][k])):
                        weights[i][k][z] -= self.alpha * d_ypred * new_w * deriv_grad_func(sum_old_h) * 
                                            theta[i - 1][z] * deriv_grad_func(sum_h)
                    R[i][k] -= self.alpha * d_ypred * new_w * deriv_grad_func(sum_old_h) * deriv_grad_func(sum_h)
                    sum_old_h = 0
                    sum_h = 0
                old_w = present_w
        for k in range(len(theta[0])):
            new_w = np.sum(old_w[k])
            for z in range(len(old_w[k])):
                sum_old_h += old_w[k][z] * theta[0][z]
            for z in range(len(weights[0][k])):
                sum_h += weights[0][k][z] * x[k][z]
            sum_h += R[0][k]
            for z in range(len(weights[0][k])):
                weights[0][k][z] -= self.alpha * d_ypred * new_w * deriv_grad_func(sum_old_h) * 
                                    x[k][z] * deriv_grad_func(sum_h)
            R[0][k] -= self.alpha * d_ypred * new_w * deriv_grad_func(sum_old_h) * deriv_grad_func(sum_h)
            sum_old_h = 0
            sum_h = 0
        
        
    def forward(self, grad_func, x, weights, theta, R):
        sum_h = 0
        for k in range(len(theta[0])):
            for z in range(len(weights[0][k])):
                sum_h += weights[0][k][z] * x[k][z]
            sum_h += R[0][k]
            theta[0][k] = grad_func(sum_h)
            sum_h = 0
        for i in range(1, len(theta)):
            for k in range(len(theta[i])):
                for z in range(len(weights[i][k])):
                    sum_h += weights[i][k][z] * theta[i - 1][z]
                sum_h += R[i][k]
                theta[i][k] = grad_func(sum_h)
                sum_h = 0
        return theta[-1]
        

    def gradient_descent(self, data, y_true, deriv_grad_func=deriv_sigmoid,
                         grad_func=sigmoid, loss_func=mse_loss):
        theta = [0] * self.n
        weights = [0] * self.n
        R = [0] * self.n
        j = 0
        for i in self.m:
            theta[j] = np.random.randn(i, 1)
            R[j] = np.full(i, 0).reshape(-1, 1)
            j += 1
            
        j = 1
        weights[0] = np.random.randn(28, self.m[0])  # первый параметр равен размерности X
        for z1, z2 in pairwise(self.m):
            weights[j] = np.random.randn(z1, z2)
            j += 1

        reg_theta = lambda theta, lamda, k: (lamda / len(theta)) * theta[k]
        
        for i in range(len(theta)):
            for k in range(len(theta[i])):  # регуляризация каждой theta
                R[i][k] = reg_theta(theta[i], self.lamda, k)
                
        for iteration in range(self.iterations):  # обновление параметров
            for x, y in zip(data, y_true):
                y_pred = self.forward(grad_func, x, weights, theta, R)
                self.backpropagation(deriv_grad_func, y, x, weights, theta, R)
            if iteration % 10 == 0:
                loss = loss_func(y_true, y_pred)
                print("Epoch %d loss: %.3f" % (iteration, loss))
        
        # return theta[-1], loss_func(y_true, theta[-1])
    

In [None]:
class Network:
    

In [6]:
m = [8, 2, 3, 1]
w = [0] * 4
j = 1   
w[0] = np.random.randn(8, m[0])
for z1, z2 in pairwise(m):
    w[j] = np.random.randn(z1, z2)
    j += 1

In [10]:
w

[array([[-6.65642245e-01,  4.12316271e-01,  3.70166096e-01,
         -6.05833387e-01, -2.49084505e+00,  4.97623854e-01,
          2.80971588e+00, -6.26117224e-01],
        [ 6.79219248e-01,  1.35254801e+00,  1.08972867e+00,
         -1.58512950e+00,  5.31412591e-01, -9.11538278e-01,
          9.86457354e-02,  6.90513701e-01],
        [-1.12758521e+00, -8.72653456e-01,  1.70353701e+00,
          4.83501117e-01, -8.06697869e-02,  1.65100776e-01,
         -1.13886541e-01, -2.51882660e-01],
        [-2.68177174e-01,  9.28912978e-01, -2.81097004e-01,
         -1.52273657e+00,  1.09795046e+00, -6.72331838e-01,
          1.11201469e+00, -1.09565737e-01],
        [ 1.52122643e+00, -1.09253714e+00, -1.78631857e-01,
         -1.86107178e-01,  1.14485692e-01, -8.17598583e-01,
          5.39964178e-03, -6.91309089e-01],
        [ 5.45743108e-01,  1.60404856e+00, -6.53002219e-01,
          1.76528664e+00, -9.19805729e-04, -3.25607178e-01,
          2.18638100e-01, -1.04023705e+00],
        [-4.4043