## Problem 4 - Implement SVM with SMO Solver:
SMO algo solves the SVM optimization problem by breaking it into smallest possible sub problems.

Dual SVM Problem:
$$
max \sum_{i=1}^n \alpha_i - \frac{1}{2} \sum_{i,j=1}^n \alpha_i \alpha_j y_i y_j K(x_i, x_j)
$$

under following conditions:
- $ 0 <= \alpha_i <= C $ for all i
- $ \sum_{i=1}^n \alpha_i y_i = 0 $

Idea: instead of solving for all alpha values at once - SMO
1. Picks two alpha values at a time ($ \alpha_i , \alpha_j $)
2. optimizes this pair and keeps others fixed
3. repeats the process with other $ \alpha $

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time

def simplified_SMO(X, y, C, tol, max_passes, seed=42):
    np.random.seed(seed)
    m, n = X.shape
    alpha = np.zeros(m)
    b = 0
    passes = 0

    while passes < max_passes:
        num_changed_alphas = 0

        for i in range(m):
            # Calculate Ei = f(x(i)) - y(i)
            f_xi = np.sum(alpha * y * np.dot(X, X[i])) + b
            E_i = f_xi - y[i]

            # if ((y(i)*Ei < -tol && αi < C) || (y(i)*Ei > tol && αi > 0))
            if (y[i] * E_i < -tol and alpha[i] < C) or (y[i] * E_i > tol and alpha[i] > 0):

                # random j
                j = i
                while j == i:
                    j = np.random.randint(0, m)

                # Calculate Ej = f(x(j)) - y(j)
                f_xj = np.sum(alpha * y * np.dot(X, X[j])) + b
                E_j = f_xj - y[j]

                # Save old alphas:
                alpha_i_old = alpha[i]
                alpha_j_old = alpha[j]

                # Compute L and H
                if y[i] != y[j]:
                    L = max(0, alpha[j] - alpha[i])
                    H = min(C, C + alpha[j] - alpha[i])
                else:
                    L = max(0, alpha[i] + alpha[j] - C)
                    H = min(C, alpha[i] + alpha[j])


                if L == H:
                    continue

                # Compute η by (14)
                K_ii = np.dot(X[i], X[i])
                K_jj = np.dot(X[j], X[j])
                K_ij = np.dot(X[i], X[j])
                eta = 2 * K_ij - K_ii - K_jj

                # if (η >= 0) continue to next i
                if eta >= 0:
                    continue

                alpha_j_new = alpha[j] - y[j] * (E_i - E_j) / eta

                if alpha_j_new > H:
                    alpha[j] = H
                elif alpha_j_new < L:
                    alpha[j] = L
                else:
                    alpha[j] = alpha_j_new

                # if (|αj - αj_old| < 10^-5) continue to next i
                if abs(alpha[j] - alpha_j_old) < 1e-5:
                    continue

                # Determine value for αi 
                alpha[i] = alpha_i_old + y[i] * y[j] * (alpha_j_old - alpha[j])

                # Compute b1 and b2 
                b1 = b - E_i - y[i] * (alpha[i] - alpha_i_old) * K_ii - \
                     y[j] * (alpha[j] - alpha_j_old) * K_ij

                b2 = b - E_j - y[i] * (alpha[i] - alpha_i_old) * K_ij - \
                     y[j] * (alpha[j] - alpha_j_old) * K_jj

                # Compute b 
                if 0 < alpha[i] < C:
                    b = b1
                elif 0 < alpha[j] < C:
                    b = b2
                else:
                    b = (b1 + b2) / 2

                # num_changed_alphas := num_changed_alphas + 1
                num_changed_alphas = num_changed_alphas + 1

        if num_changed_alphas == 0:
            break
        else:
            passes = passes + 1

    return alpha, b

# wrapper ->
class SimplifiedSMO:
    def __init__(self, C=1.0, tol=0.001, max_passes=100, seed=42):
        self.support_vectors = None
        self.b = None
        self.alpha = None
        self.y_train = None
        self.X_train = None
        self.C = C
        self.tol = tol
        self.max_passes = max_passes

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
        self.alpha, self.b = simplified_SMO(X, y, self.C, self.tol, self.max_passes)
        self.support_vectors = np.where(self.alpha > 1e-5)[0]
        print(f"Found {len(self.support_vectors)} support vectors")

    def predict(self, X):
        predictions = np.zeros(X.shape[0])
        for j in range(X.shape[0]):
            pred = 0
            for i in range(self.X_train.shape[0]):
                if self.alpha[i] > 0:
                    pred += self.alpha[i] * self.y_train[i] * np.dot(self.X_train[i], X[j])
            predictions[j] = pred + self.b
        return np.sign(predictions)

    def score(self, X, y):
        predictions = self.predict(X)
        return np.mean(predictions == y)

In [2]:
data = np.loadtxt("spambase/spambase.data", delimiter=",")
X = data[:, :-1]
y = data[:, -1]

y = 2 * y - 1

scaler_spam = StandardScaler()
X_normalized = scaler_spam.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

print("Initial Training SVM with SMO for C = 1.0 ...")
smo = SimplifiedSMO(C=1.0, tol=0.001, max_passes=100)
smo.fit(X_train, y_train)

train_accuracy = smo.score(X_train, y_train)
test_accuracy = smo.score(X_test, y_test)

print(f"\nResults:")
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

Initial Training SVM with SMO for C = 1.0 ...
Found 1169 support vectors

Results:
Training Accuracy: 0.9247
Test Accuracy: 0.9175


In [45]:
# hyperparameter tuning - 1
for C_value in np.arange(0, 2.1, 0.1):
    print(f"\nTesting C = {C_value}")
    smo = SimplifiedSMO(C=C_value, tol=0.001, max_passes=100)
    smo.fit(X_train, y_train)

    train_acc = smo.score(X_train, y_train)
    test_acc = smo.score(X_test, y_test)

    print(f"  Train Accuracy: {train_acc:.4f}")
    print(f"  Test Accuracy: {test_acc:.4f}")
    print(f"  Support Vectors: {len(smo.support_vectors)}")


Testing C = 0.0
Found 0 support vectors
  Train Accuracy: 0.0000
  Test Accuracy: 0.0000
  Support Vectors: 0

Testing C = 0.1
Found 1009 support vectors
  Train Accuracy: 0.9283
  Test Accuracy: 0.9229
  Support Vectors: 1009

Testing C = 0.2
Found 1028 support vectors
  Train Accuracy: 0.9283
  Test Accuracy: 0.9207
  Support Vectors: 1028

Testing C = 0.30000000000000004
Found 1032 support vectors
  Train Accuracy: 0.9302
  Test Accuracy: 0.9251
  Support Vectors: 1032

Testing C = 0.4
Found 1064 support vectors
  Train Accuracy: 0.9304
  Test Accuracy: 0.9175
  Support Vectors: 1064

Testing C = 0.5
Found 1094 support vectors
  Train Accuracy: 0.9269
  Test Accuracy: 0.9164
  Support Vectors: 1094

Testing C = 0.6000000000000001
Found 1090 support vectors
  Train Accuracy: 0.9272
  Test Accuracy: 0.9164
  Support Vectors: 1090

Testing C = 0.7000000000000001
Found 1110 support vectors
  Train Accuracy: 0.9247
  Test Accuracy: 0.9175
  Support Vectors: 1110

Testing C = 0.8
Found 1

In [46]:
# hyperparameter tuning - 2
for C_value in np.arange(1.7, 3.1, 0.1):
    print(f"\nTesting C = {C_value}")
    smo = SimplifiedSMO(C=C_value, tol=0.001, max_passes=100)
    smo.fit(X_train, y_train)

    train_acc = smo.score(X_train, y_train)
    test_acc = smo.score(X_test, y_test)

    print(f"  Train Accuracy: {train_acc:.4f}")
    print(f"  Test Accuracy: {test_acc:.4f}")
    print(f"  Support Vectors: {len(smo.support_vectors)}")


Testing C = 1.7
Found 1247 support vectors
  Train Accuracy: 0.9234
  Test Accuracy: 0.9164
  Support Vectors: 1247

Testing C = 1.8
Found 1250 support vectors
  Train Accuracy: 0.8807
  Test Accuracy: 0.8882
  Support Vectors: 1250

Testing C = 1.9000000000000001
Found 1247 support vectors
  Train Accuracy: 0.9253
  Test Accuracy: 0.9066
  Support Vectors: 1247

Testing C = 2.0
Found 1289 support vectors
  Train Accuracy: 0.9296
  Test Accuracy: 0.9229
  Support Vectors: 1289

Testing C = 2.1000000000000005
Found 1268 support vectors
  Train Accuracy: 0.9201
  Test Accuracy: 0.9131
  Support Vectors: 1268

Testing C = 2.2
Found 1287 support vectors
  Train Accuracy: 0.7245
  Test Accuracy: 0.7307
  Support Vectors: 1287

Testing C = 2.3000000000000007
Found 1324 support vectors
  Train Accuracy: 0.9144
  Test Accuracy: 0.8979
  Support Vectors: 1324

Testing C = 2.4000000000000004
Found 1317 support vectors
  Train Accuracy: 0.9299
  Test Accuracy: 0.9197
  Support Vectors: 1317

Tes

In [3]:
best_train = 0
best_test = 0
best_config = {}
results = []

C_values = np.arange(0.25, 0.36, 0.01)
tol_values = np.arange(0.0001, 0.0011, 0.0001)

print("Starting grid search...")
start_time = time.time()

for C in C_values:
    for tol in tol_values:
        smo = SimplifiedSMO(C=C, tol=tol, max_passes=200, seed=42)
        smo.fit(X_train, y_train)

        train_acc = smo.score(X_train, y_train)
        test_acc = smo.score(X_test, y_test)

        results.append({'C': C, 'tol': tol, 'train': train_acc, 'test': test_acc})

        if test_acc > best_test and train_acc >= 0.93:
            best_test = test_acc
            best_train = train_acc
            best_config = {'C': C, 'tol': tol}
            print(f"New best: C={C:.2f}, tol={tol}, Train={train_acc:.4f}, Test={test_acc:.4f}")

print(f"\nGrid search completed in {time.time() - start_time:.1f} seconds")
print(f"Best config: {best_config}")
print(f"Best performance: Train={best_train:.4f}, Test={best_test:.4f}")

Starting grid search...
Found 964 support vectors
New best: C=0.25, tol=0.0001, Train=0.9310, Test=0.9229
Found 957 support vectors
Found 967 support vectors
New best: C=0.25, tol=0.00030000000000000003, Train=0.9304, Test=0.9251
Found 962 support vectors
Found 961 support vectors
Found 957 support vectors
New best: C=0.25, tol=0.0006000000000000001, Train=0.9323, Test=0.9283
Found 962 support vectors
Found 952 support vectors
Found 951 support vectors
New best: C=0.25, tol=0.0009000000000000001, Train=0.9310, Test=0.9294
Found 955 support vectors
Found 957 support vectors
Found 964 support vectors
Found 968 support vectors
Found 962 support vectors
Found 966 support vectors
Found 966 support vectors
Found 979 support vectors
Found 960 support vectors
Found 979 support vectors
Found 964 support vectors
Found 952 support vectors
Found 952 support vectors
Found 963 support vectors
Found 947 support vectors
Found 977 support vectors
Found 970 support vectors
Found 974 support vectors
Foun

In [7]:
final_smo = SimplifiedSMO(C=0.29, tol=0.0003, max_passes=200, seed=42)
final_smo.fit(X_train, y_train)
train_acc = final_smo.score(X_train, y_train)
test_acc = final_smo.score(X_test, y_test)
print(f"  Train Accuracy: {train_acc:.4f}")
print(f"  Test Accuracy: {test_acc:.4f}")

Found 976 support vectors
  Train Accuracy: 0.9313
  Test Accuracy: 0.9251
