In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [None]:
# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1, parser='auto')
X, y = mnist["data"].to_numpy(), mnist["target"].to_numpy()

In [None]:
# Convert y to integer values
y = y.astype(int)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Add bias term to the features
X_train = np.hstack((np.ones((X_train.shape[0], 1)), X_train))
X_test = np.hstack((np.ones((X_test.shape[0], 1)), X_test))

num_classes = 10
num_features = X_train.shape[1]

In [None]:
# Softmax function
def softmax(z):
    z -= np.max(z,axis=1,keepdims=True)
    exp_z = np.exp(z)
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

# Cross-entropy loss function
def cross_entropy_loss(y_true, y_pred):
    return -np.mean(np.log(y_pred[np.arange(len(y_true)), y_true]))

In [None]:
# Stochastic Gradient Descent
def SGD(weights, learning_rate, batch_size, num_iter=1000):
    
    # save train accuracy
    acc_trace = np.zeros((int(num_iter/20),))
    
    for i in range(num_iter):
        
        
        #############################
        # write your code here
        
        
        
        
        ##############################
        if (i+1) % 20 == 0:
             
            # Compute loss and accuracy on the training set
            z_train = np.dot(X_train, weights.T)
            y_pred_train = np.argmax(softmax(z_train), axis=1)
            loss_train = cross_entropy_loss(y_train, softmax(z_train))
            acc_trace[((i+1) // 20)-1] = accuracy_score(y_train, y_pred_train)

    

    # Evaluate on the test set
    z_test = np.dot(X_test, weights.T)
    y_pred_test = np.argmax(softmax(z_test), axis=1)
    accuracy_test = accuracy_score(y_test, y_pred_test)
    print(f"Test Accuracy: {accuracy_test:.4f}")
    
    return acc_trace

In [None]:
acc_trace1 = SGD(np.zeros((num_classes, num_features)), learning_rate=1e-4, batch_size=1, num_iter=1000)

In [None]:
# Stochastic Gradient Descent with momentum
def SGD_withmom(weights, learning_rate, momentum, batch_size, num_iter=1000):
    
    # save train accuracy
    acc_trace = np.zeros((int(num_iter/20),))
    
    # initialize momentum
    mom_g = np.zeros_like(weights)
    for i in range(num_iter):
        
        #############################
        # write your code here
        
        
        
        
        ##############################

        if (i+1) % 20 == 0:
        
            # Compute loss and accuracy on the training set
            z_train = np.dot(X_train, weights.T)
            y_pred_train = np.argmax(softmax(z_train), axis=1)
            loss_train = cross_entropy_loss(y_train, softmax(z_train))
            acc_trace[((i+1) // 20)-1] = accuracy_score(y_train, y_pred_train)

    

    # Evaluate on the test set
    z_test = np.dot(X_test, weights.T)
    y_pred_test = np.argmax(softmax(z_test), axis=1)
    accuracy_test = accuracy_score(y_test, y_pred_test)
    print(f"Test Accuracy: {accuracy_test:.4f}")
    
    return acc_trace

In [None]:
acc_trace2 = SGD_withmom(np.zeros((num_classes, num_features)), learning_rate=1e-4, momentum=0.5,batch_size=1, num_iter=1000)

In [None]:
fig, ax = plt.subplots(figsize=(12,8))

# Plot the data using semilogy
ax.plot(20*np.arange(len(acc_trace1)), acc_trace1, linewidth=2, label='batch=1, lr=1e-4, no momentum')
ax.plot(20*np.arange(len(acc_trace2)), acc_trace2, linewidth=2, label='batch=1, lr=1e-4, momentum=0.5')
# Set labels and title
ax.set_xlabel('Number of iteration')
ax.set_ylabel('Accuracy')
ax.set_title('Accuracy v.s. Number of iteration')

# Add a legend
ax.legend()

# Display the plot
plt.show()