In [48]:
import numpy as np
from torchvision.datasets import MNIST

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
                    transform=lambda x: np.array(x).flatten(),
                    download=True,
                    train=is_train)
    
    mnist_data = []
    mnist_labels = []
    
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    
    return np.array(mnist_data), np.array(mnist_labels)

train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

print(train_Y)


[5 0 4 ... 5 6 8]


Step 1: Normalize the data

In [3]:
train_x = train_X / 255.0
train_y = train_Y / 255.0

Step 2: One hot encode the labels

In [4]:
def one_hot_encode(labels, num_classes):
    return np.eye(num_classes)[labels]

test_y_one_hot = one_hot_encode(test_Y, 10)
train_Y_one_hot = one_hot_encode(train_Y, 10)

Step 3: softmax function

In [5]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=-1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=-1, keepdims=True)

Step 4: forward propagation

In [19]:
def forward_propagation(input, weight, bias):
    z = np.dot(input, weight) + bias
    # return np.where(z >= 0, 1, 0)
    return softmax(z)

Step 5: backwords propagation

In [45]:
def backward_propagation(input, label, label_predict, weight, bias, learning_rate):
    
    error = label - label_predict
    
    weight += np.dot(input.T, error) * learning_rate
    bias += np.sum(error , axis=0) * learning_rate
    # bias += label * learning_rate
    return weight, bias

step 6: calcularea acuratetei

In [8]:
def accuracy(y_true, y_pred):
    return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

Step 7: antrenarea modelului

In [47]:
def train_model(train_x, train_y, test_x, test_y, epochs = 50, batch_size = 100, learning_rate = 0.05):
    np.random.seed(42)
    
    weight = np.random.randn(train_x.shape[1], 10) * 0.01
    bias = np.zeros((10 ,))
    
    best_weight, best_bias = None, None 
    best_accuracy = 0
    for epoch in range(epochs):
        permutation = np.random.permutation(train_x.shape[0])
        train_x_shufle = train_x[permutation]
        train_y_shufle = train_y[permutation]
        
        for i in range(0, train_x_shufle.shape[0], batch_size):
            x_batch = train_x_shufle[i:i+batch_size]
            y_batch = train_y_shufle[i:i+batch_size]
            
            y_pred = forward_propagation(x_batch, weight, bias)
            weight, bias = backward_propagation(x_batch, y_batch, y_pred, weight, bias, learning_rate)
            
        y_train_pred = forward_propagation(train_X, weight, bias)
        train_acc = accuracy(train_Y_one_hot, y_train_pred)
        
        y_test_pred = forward_propagation(test_x, weight, bias)
        test_acc = accuracy(test_y_one_hot, y_test_pred)
        
        if test_acc > best_accuracy:
            best_accuracy = test_acc
            best_weight = np.copy(weight) 
            best_bias = np.copy(bias)
            
        print(f'Epoch {epoch+1}/{epochs}, Train Accuracy: {train_acc:.4f}, Test Accuracy: {test_acc:.4f}')
    
    print(f'Best Accuracy on test data: {best_accuracy:.4f}')
    y_test_pred = forward_propagation(test_x, best_weight, best_bias)
    test_acc = accuracy(test_y_one_hot, y_test_pred)
    
    print(f"proba:  {test_acc:.4f}")
   
    print(f'best weight: {best_weight}')

train_model(train_X, train_Y_one_hot, test_X, test_y_one_hot, epochs=10, batch_size=100, learning_rate=0.05)


Epoch 1/10, Train Accuracy: 0.8223, Test Accuracy: 0.8275
Epoch 2/10, Train Accuracy: 0.8864, Test Accuracy: 0.8786
Epoch 3/10, Train Accuracy: 0.8831, Test Accuracy: 0.8855
Epoch 4/10, Train Accuracy: 0.9079, Test Accuracy: 0.9071
Epoch 5/10, Train Accuracy: 0.8416, Test Accuracy: 0.8371
Epoch 6/10, Train Accuracy: 0.9014, Test Accuracy: 0.8982
Epoch 7/10, Train Accuracy: 0.8459, Test Accuracy: 0.8434
Epoch 8/10, Train Accuracy: 0.9034, Test Accuracy: 0.8974
Epoch 9/10, Train Accuracy: 0.8727, Test Accuracy: 0.8712
Epoch 10/10, Train Accuracy: 0.9164, Test Accuracy: 0.9093
Best Accuracy on test data: 0.9093
proba:  0.9093
best weight: [[ 0.00496714 -0.00138264  0.00647689 ...  0.00767435 -0.00469474
   0.0054256 ]
 [-0.00463418 -0.0046573   0.00241962 ...  0.00314247 -0.00908024
  -0.01412304]
 [ 0.01465649 -0.00225776  0.00067528 ...  0.00375698 -0.00600639
  -0.00291694]
 ...
 [ 0.00577072 -0.00178023  0.00155155 ... -0.00896368 -0.00448696
   0.00234259]
 [ 0.00598996  0.00853349 -