In [1]:
import numpy as np
import pandas as pd 
from tqdm import tqdm
from sklearn.metrics import confusion_matrix


In [3]:
train_data = pd.read_csv('/Users/wenqingz/Desktop/CodingAssignment5/coding5_train.csv')
test_data = pd.read_csv('/Users/wenqingz/Desktop/CodingAssignment5/coding5_test.csv')

In [4]:
X_train = train_data.drop(columns=['Y']).to_numpy()
Y_train = train_data['Y'].to_numpy()
X_test = test_data.drop(columns=['Y']).to_numpy()
Y_test = test_data['Y'].to_numpy()
Y_train = np.sign(Y_train-5.5)
Y_test = np.sign(Y_test-5.5)

In [5]:
def pegasos_svm_train(X, y, lambda_reg, epoch):
    """
    Train a SVM using the Pegasos algorithm.

    Args:
    - X: Training data, a numpy array of shape (num_samples, num_features)
    - y: Labels, a numpy array of shape (num_samples,)
    - lambda_reg: Regularization parameter
    - epoch: Number of iterations for the algorithm

    Returns:
    - w: The learned weights, a numpy array of shape (num_features,)
    """
    num_samples, num_features = X.shape
    beta = np.zeros(num_features)
    alpha = 0
    for t in range(1, epoch + 1):
        beta_del = np.zeros_like(beta)
        alpha_del = 0
        # Learning rate for the current iteration
        eta = 1 / (lambda_reg * t)
        shuffled_indices = np.random.permutation(num_samples)
        X_new = X[shuffled_indices]
        y_new = y[shuffled_indices]
        for i in range(num_samples):
        # Randomly pick a sample
        # i = np.random.randint(num_samples)
            x_i, y_i = X_new[i], y_new[i]


        # Sub-gradient calculation
            if y_i * (np.dot(beta, x_i)+alpha) < 1:
                # w = (1 - eta * lambda_reg) * w + eta * y_i * x_i
                # beta += - eta * (lambda_reg * beta - y_i*x_i)
                beta_del +=  - eta * (lambda_reg * beta - y_i*x_i)
                alpha_del +=  eta * y_i
                # alpha += eta * y_i
            else:
                # w = (1 - eta * lambda_reg) * w
                beta_del += - eta * (lambda_reg * beta)
                # beta += - eta * (lambda_reg * beta)
        beta += beta_del/num_samples
        alpha += alpha_del/num_samples

    return beta

In [6]:
epoch = 20
lambda_reg = 0.1
beta = pegasos_svm_train(X_train,Y_train,lambda_reg,epoch)
y_predict = np.sign(np.dot(X_test,beta))
confusion = confusion_matrix(Y_test,y_predict)

In [7]:
confusion

array([[288,  12],
       [ 10, 290]])

In [8]:
(confusion[0,1]+confusion[1,0])/np.sum(confusion)

0.03666666666666667