Importarea datelor

In [120]:
import numpy as np
from torchvision.datasets import MNIST

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
            transform=lambda x: np.array(x).flatten(),
            download=True,
            train=is_train)
    
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    return np.array(mnist_data), np.array(mnist_labels)


train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

train_X = train_X/255.0
test_X = test_X/255.0

train_Y = np.eye(10)[train_Y]
test_Y = np.eye(10)[test_Y]

Initiaizarea datelor

In [121]:
input_size = 784      
hidden_size = 100    
output_size = 10      
learning_rate = 0.01
epochs = 10
batch_size = 64

np.random.seed(0)
weights_input_hidden = np.random.randn(input_size, hidden_size) * 0.01
weights_hidden_output = np.random.randn(hidden_size, output_size) * 0.01
b1 = np.zeros((1, hidden_size))
b2 = np.zeros((1, output_size))

Functia de activare: sigmoid

In [69]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(y):
    return y * (1 - y)

Softmax

In [70]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

Forword propagation

In [71]:
def forward(X):
    hidden_input = np.dot(X, weights_input_hidden) + b1
    hidden_output = sigmoid(hidden_input)
    output_input = np.dot(hidden_output, weights_hidden_output) + b2
    output_output = softmax(output_input)
    return hidden_output, output_output

Backwords propagation

In [122]:
def backprop(X, y, hidden_output, output_output):
    global weights_input_hidden, weights_hidden_output, b1, b2
    
    output_error = output_output - y 
    
    hidden_error = np.dot(output_error, weights_hidden_output.T)
    hidden_delta = hidden_error * hidden_output * (1 - hidden_output)  
    
    weights_hidden_output_gradient = np.dot(hidden_output.T, output_error)
    weights_input_hidden_gradient = np.dot(X.T, hidden_delta)
    
    b2_output_gradient = np.sum(output_error, axis=0, keepdims=True)
    b1_hidden_gradient = np.sum(hidden_delta, axis=0, keepdims=True)
    
    weights_hidden_output -= learning_rate * weights_hidden_output_gradient
    weights_input_hidden -= learning_rate * weights_input_hidden_gradient
    
    b2 -= learning_rate * b2_output_gradient
    b1 -= learning_rate * b1_hidden_gradient


In [111]:
def backprop2(X, y, hidden_output, output_output):
    global weights_input_hidden, weights_hidden_output
    
    output_error = output_output - y  
          
    hidden_error = np.dot(output_error, weights_hidden_output.T)  
    hidden_delta = hidden_error * hidden_output * (1 - hidden_output) 
    
    weights_hidden_output_gradient = np.dot(hidden_output.T, output_error)  
    weights_input_hidden_gradient = np.dot(X.T, hidden_delta) 

    weights_hidden_output -= learning_rate * weights_hidden_output_gradient
    weights_input_hidden -= learning_rate * weights_input_hidden_gradient


Cross entropy

In [74]:

def cross_entropy(y_pred, y_true):
    epsilon = 1e-12
    y_pred = np.clip(y_pred, epsilon, 1. - epsilon)
    return -np.sum(y_true * np.log(y_pred)) / y_pred.shape[0]

Train

In [119]:
def train_dynamic(X, y, epochs=10, batch_size=100):
    global learning_rate
    n_samples = train_X.shape[0]
    
    history_test_acc = []

    for epoch in range(epochs):    
        permutation = np.random.permutation(n_samples)
        train_X_shuffled = train_X[permutation]
        train_Y_shuffled = train_Y[permutation]
        
        total_loss = 0

        for start in range(0, n_samples, batch_size):
            X_batch = train_X_shuffled[start:start + batch_size]
            y_batch = train_Y_shuffled[start:start + batch_size]
            
            hidden_output, output_output = forward(X_batch)
            backprop(X_batch, y_batch, hidden_output, output_output)
            
            batch_loss = cross_entropy(output_output, y_batch)
            total_loss += batch_loss
        
        test_accuracy = compute_accuracy(test_X, test_Y)
        history_test_acc.append(test_accuracy)
        
        if epoch % 10 == 0:
           if abs(history_test_acc[-1] - np.mean(history_test_acc[-10:-1])) < 0.1:
                learning_rate = learning_rate / 1.1
                print("Learing rate changed")
                
        
        print(f"Epoch {epoch + 1}, Loss: {total_loss:.4f}, Test Accuracy: {test_accuracy:.4f}%")

        
    

def compute_accuracy(X, y):
    _, output = forward(X)
    predictions = np.argmax(output, axis=1)
    labels = np.argmax(y, axis=1)
    return np.mean(predictions == labels) 

train_dynamic(train_X, train_Y, epochs=100)

Epoch 1, Loss: 6.3514, Test Accuracy: 0.98%
Epoch 2, Loss: 6.0238, Test Accuracy: 0.98%
Epoch 3, Loss: 5.6506, Test Accuracy: 0.98%
Epoch 4, Loss: 5.3769, Test Accuracy: 0.98%
Epoch 5, Loss: 5.0726, Test Accuracy: 0.98%
Epoch 6, Loss: 4.8290, Test Accuracy: 0.98%


KeyboardInterrupt: 