In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

import gzip
import itertools

In [2]:
def pairwise(iterable):
        a, b = itertools.tee(iterable)
        next(b, None)
        return zip(a, b)  

In [3]:
def extract_data(filename, num_images, IMAGE_WIDTH):
    print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        bytestream.read(16)
        buf = bytestream.read(IMAGE_WIDTH * IMAGE_WIDTH * num_images)
        data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
        data = data.reshape(num_images, IMAGE_WIDTH*IMAGE_WIDTH)
        return data
    
    
def extract_labels(filename, num_images):
    print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        bytestream.read(8)
        buf = bytestream.read(1 * num_images)
        labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
    return labels

In [4]:
m = 2000
X = extract_data('t10k-images-idx3-ubyte.gz', m, 28)
Y = extract_labels('t10k-labels-idx1-ubyte.gz', m).reshape(m,1)

Extracting t10k-images-idx3-ubyte.gz
Extracting t10k-labels-idx1-ubyte.gz


In [5]:
# нормализация данных
X -= int(np.mean(X))
X /= int(np.std(X))
X = X.reshape(len(X), 28, 28)  # представление картинки 28х28

In [6]:
for i in Y:
    if i[0] % 2 == [0]:
        i[0] = 0  # число четное
    else:
        i[0] = 1  # число нечетное
        
print(Y)

[[1]
 [0]
 [1]
 ...
 [1]
 [1]
 [1]]


In [7]:
class Utils:
    @staticmethod
    def sigmoid(X):
        return 1 / (1 + np.exp(-X))

            
    @staticmethod
    def deriv_sigmoid(X):
        fx = Utils.sigmoid(X)
        return fx * (1 - fx)
    
    
    @staticmethod
    def mse_loss(Y, theta):
        return ((Y - theta) ** 2).mean()

    
    @staticmethod
    def softmax(X):
        out = np.exp(X)
        return out/np.sum(out)

    
    @staticmethod
    def cross_entropy_loss(Y, theta):
        return -np.sum(Y * np.log(theta)) 

In [8]:
class Network():
    def __init__(self):
        self.iterations = 8  # количество итераций для градиентного спуска
        self.alpha = 0.2  # скорость обучения
        self.lamda = 0.5  # параметр для регуляризации
        self.n = 5  # слоев в нейросети
        self.m = [28, 14, 7, 2, 1]  # массив числа нейронов для каждого слоя
    
    
    def backpropagation(self, deriv_grad_func, y, x, weights, theta, R):
        d_ypred = -2 * (y - theta[-1][0])
        sum_h = 0
        sum_old_h = 0
        old_w = list()
        for i in range(len(theta) - 1, 0, -1):
            if i == len(theta) - 1:
                old_w = weights[i]
                for k in range(len(theta[i])):
                    for z in range(len(weights[i][k])):
                        sum_h += weights[i][k][z] * theta[i - 1][z]
                    sum_h += R[i][k]
                    for z in range(len(weights[i][k])):
                        weights[i][k][z] -= self.alpha * d_ypred * theta[i - 1][z] * deriv_grad_func(sum_h)
                    R[i][k] -= self.alpha * d_ypred * deriv_grad_func(sum_h)
                    sum_h = 0
            else:
                present_w = weights[i]
                for k in range(len(theta[i])):
                    new_w = np.sum(old_w[k])
                    for z in range(len(old_w[k])):
                        sum_old_h += old_w[k][z] * theta[i][z]
                    for z in range(len(weights[i][k])):
                        sum_h += weights[i][k][z] * theta[i - 1][z]
                    sum_h += R[i][k]
                    for z in range(len(weights[i][k])):
                        weights[i][k][z] -= self.alpha * d_ypred * new_w * deriv_grad_func(sum_old_h) * theta[i - 1][z] * deriv_grad_func(sum_h)
                    R[i][k] -= self.alpha * d_ypred * new_w * deriv_grad_func(sum_old_h) * deriv_grad_func(sum_h)
                    sum_old_h = 0
                    sum_h = 0
                old_w = present_w
        for k in range(len(theta[0])):
            new_w = np.sum(old_w[k])
            for z in range(len(old_w[k])):
                sum_old_h += old_w[k][z] * theta[0][z]
            for z in range(len(weights[0][k])):
                sum_h += weights[0][k][z] * x[k][z]
            sum_h += R[0][k]
            for z in range(len(weights[0][k])):
                weights[0][k][z] -= self.alpha * d_ypred * new_w * deriv_grad_func(sum_old_h) * x[k][z] * deriv_grad_func(sum_h)
            R[0][k] -= self.alpha * d_ypred * new_w * deriv_grad_func(sum_old_h) * deriv_grad_func(sum_h)
            sum_old_h = 0
            sum_h = 0
        
        
    def forward(self, grad_func, x, weights, theta, R):
        sum_h = 0
        for k in range(len(theta[0])):
            for z in range(len(weights[0][k])):
                sum_h += weights[0][k][z] * x[k][z]
            sum_h += R[0][k]
            theta[0][k] = grad_func(sum_h)
            sum_h = 0
        for i in range(1, len(theta)):
            for k in range(len(theta[i])):
                for z in range(len(weights[i][k])):
                    sum_h += weights[i][k][z] * theta[i - 1][z]
                sum_h += R[i][k]
                theta[i][k] = grad_func(sum_h)
                sum_h = 0
        return theta[-1]
        
        
    def reg_theta(self, theta, k):
        return (self.lamda / len(theta)) * theta[k]

    
    def gradient_descent(self, data, y_true, deriv_grad_func=Utils.deriv_sigmoid,
                         grad_func=Utils.sigmoid, loss_func=Utils.mse_loss):
        theta = [0] * self.n
        weights = [0] * self.n
        R = [0] * self.n
        y_pred = np.array([])
        old_y_pred = np.array([])
        j = 0
        for i in self.m:
            theta[j] = np.random.randn(i, 1)
            R[j] = np.full(i, 0.).reshape(-1, 1)
            j += 1
            
        j = 1
        weights[0] = np.random.uniform(-0.5, 0.5, size=(28, self.m[0]))  # первый параметр равен размерности X
        for z1, z2 in pairwise(self.m):
            weights[j] = np.random.uniform(-0.5, 0.5, size=(z1, z2))
            j += 1
        
        for i in range(len(theta)):
            for k in range(len(theta[i])):  # регуляризация каждой theta
                new_R = self.reg_theta(theta[i], k)
                R[i][k] = new_R
        for iteration in tqdm(range(self.iterations)):  # обновление параметров
            for x, y in zip(data, y_true):
                new_y_pred = self.forward(grad_func, x, weights, theta, R)
                self.backpropagation(deriv_grad_func, y, x, weights, theta, R)
                y_pred = np.append(y_pred, new_y_pred[0])
                y_pred = y_pred.reshape(len(y_pred), 1)
            if iteration % 1 == 0:
                loss = loss_func(y_true[:len(y_pred)], y_pred)
                print("Epoch %d loss: %.3f" % (iteration + 1, loss))
                y_pred = old_y_pred
        
        return theta, weights, R

In [9]:
network = Network()
theta, weights, R = network.gradient_descent(X, Y)

 12%|█▎        | 1/8 [00:43<05:06, 43.76s/it]

Epoch 1 loss: 0.255


 25%|██▌       | 2/8 [01:25<04:19, 43.27s/it]

Epoch 2 loss: 0.194


 38%|███▊      | 3/8 [02:08<03:35, 43.11s/it]

Epoch 3 loss: 0.135


 50%|█████     | 4/8 [02:51<02:51, 42.98s/it]

Epoch 4 loss: 0.124


 62%|██████▎   | 5/8 [03:34<02:08, 42.91s/it]

Epoch 5 loss: 0.121


 75%|███████▌  | 6/8 [04:16<01:25, 42.80s/it]

Epoch 6 loss: 0.118


 88%|████████▊ | 7/8 [04:59<00:42, 42.82s/it]

Epoch 7 loss: 0.118


100%|██████████| 8/8 [05:41<00:00, 42.74s/it]

Epoch 8 loss: 0.119





In [14]:
_max = 10000
X_test = extract_data('t10k-images-idx3-ubyte.gz', _max, 28)
Y_test = extract_labels('t10k-labels-idx1-ubyte.gz', _max).reshape(_max,1)

X_test -= int(np.mean(X_test))
X_test /= int(np.std(X_test))
X_test = X_test.reshape(len(X_test), 28, 28)
for i in Y_test:
    if i[0] % 2 == [0]:
        i[0] = 0  # число четное
    else:
        i[0] = 1  # число нечетное

Extracting t10k-images-idx3-ubyte.gz
Extracting t10k-labels-idx1-ubyte.gz


In [15]:
n = 0
accuary = 0
for i in X_test:
    res = network.forward(Utils.sigmoid, i, weights, theta, R)
    if Y_test[n][0] == 1 and res[0][0] > 0.1:
        accuary += 1
    elif Y_test[n][0] == 0 and res[0][0] < 0.1:
        accuary += 1
    # print(str(Y_test[n]) + '->' + str(res))
    n += 1
print(accuary)
print("Overall Accuracy: %.2f" % (float(accuary/len(X_test)*100)))

7581
Overall Accuracy: 75.81
