In [1]:
import numpy as np

In [2]:
class Layer:
    def __init__(self):
        pass
    
    def forward(self, input):
        pass

    def backward(self, output_gradient, learning_rate):
        pass

In [3]:
class Dense(Layer):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.weights = np.random.randn(output_size, input_size)
        self.biases = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.biases

    def backward(self, output_gradient, learning_rate):
        weights_gradient = np.dot(output_gradient, self.input.T)
        self.biases -= learning_rate * output_gradient
        self.weights -= learning_rate * weights_gradient
        return np.dot(self.weights.T, output_gradient)

In [4]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

In [5]:
def predict(network, input):
    output = input
    for layer in network:
        #print(f'output.shape {output.shape}, layer {layer}')
        output = layer.forward(output)
    return output

def train(network, loss, loss_prime, x_train, y_train, epochs = 5000, learning_rate = 0.01, interval=100, verbose = True):
    for e in range(epochs):
        error = 0
        for x, y in zip(x_train, y_train):
            # forward
            output = predict(network, x)
            # error
            error += loss(y, output)

            # backward
            grad = loss_prime(y, output)
            for layer in reversed(network):
                grad = layer.backward(grad, learning_rate)

        error /= len(x_train)
        if verbose and (e + 1) % interval == 0:
            print(f"{e + 1}/{epochs}, error={error}")
    
    print(f"{epochs}/{epochs}, error={error}")

In [6]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))

class Tanh(Activation):
    def __init__(self):
        def tanh(x):
            return np.tanh(x)

        def tanh_prime(x):
            return 1 - np.tanh(x) ** 2

        super().__init__(tanh, tanh_prime)

In [7]:
network = [
    Dense(7,64),
    Tanh(),
    Dense(64,10),
    Tanh(),
]

In [8]:
x0 = [1, 1, 1,0 ,1 ,1, 1] #0
x1 = [0, 0, 1,0 ,0 ,1, 0] #1
x2 = [1, 0, 1,1 ,1 ,0, 1] #2
x3 = [1, 0, 1,1 ,0 ,1, 1] #3
x4 = [0, 1, 1,1 ,0 ,1, 0] #4
x5 = [1, 1, 0,1 ,0 ,1, 1] #5
x6 = [1, 1, 0,1 ,1 ,1, 1] #6
x7 = [1, 0, 1,0 ,0 ,1, 0] #7
x8 = [1, 1, 1,1 ,1 ,1, 1] #8
x9 = [1, 1, 1,1 ,0 ,1, 1] #9
x_train = [x0, x1, x2, x3, x4, x5, x6, x7, x8, x9]
y_train = [[1 if i == j else 0 for i in range(10)] for j in range(10)]

x_train = np.array(x_train).reshape(10, 7, 1)
y_train = np.array(y_train).reshape(10, 10, 1)

In [9]:
train(network, mse, mse_prime, x_train, y_train, epochs = 1200000, learning_rate = 0.01, interval=10000, verbose = True)

10000/1200000, error=0.3799310500434433
20000/1200000, error=0.36002193965447943
30000/1200000, error=0.36001067809042775
40000/1200000, error=0.36000617250672734
50000/1200000, error=0.3600034730188052
60000/1200000, error=0.3600014520827742
70000/1200000, error=0.3599996322911302
80000/1200000, error=0.3599976578622551
90000/1200000, error=0.35999501246330656
100000/1200000, error=0.3599903273263626
110000/1200000, error=0.359976153361745
120000/1200000, error=0.34999216755240276
130000/1200000, error=0.17016327038471835
140000/1200000, error=0.17003104134022987
150000/1200000, error=0.17001541772297282
160000/1200000, error=0.17000684755154422
170000/1200000, error=0.16999277182003972
180000/1200000, error=0.16000201330853134
190000/1200000, error=0.1599896724772642
200000/1200000, error=0.10004870068851619
210000/1200000, error=0.1000225891058516
220000/1200000, error=0.10001457489892011
230000/1200000, error=0.10001032686156215
240000/1200000, error=0.10000743122778477
250000/1200

In [13]:
x_input = np.array(x1).reshape(7,1)
output = predict(network, x_input)
print(f'predict x is :{ np.argmax(output)}')

predict x is :1
