In [None]:
"""
Write a Python function that implements a deterministic version of the Pegasos algorithm to train a kernel SVM classifier from scratch. The function should take a dataset (as a 2D NumPy array where each row represents a data sample and each column represents a feature), a label vector (1D NumPy array where each entry corresponds to the label of the sample), and training parameters such as the choice of kernel (linear or RBF), regularization parameter (lambda), and the number of iterations. Note that while the original Pegasos algorithm is stochastic (it selects a single random sample at each step), this problem requires using all samples in every iteration (i.e., no random sampling). The function should perform binary classification and return the model's alpha coefficients and bias.

Example:
Input:
data = np.array([[1, 2], [2, 3], [3, 1], [4, 1]]), labels = np.array([1, 1, -1, -1]), kernel = 'rbf', lambda_val = 0.01, iterations = 100, sigma = 1.0
Output:
alpha = [0.03, 0.02, 0.05, 0.01], b = -0.05
Reasoning:
Using the RBF kernel, the Pegasos algorithm iteratively updates the weights based on a sub-gradient descent method, taking into account the non-linear separability of the data induced by the kernel transformation.
"""

In [21]:
import numpy as np
from numpy.linalg import norm

def pegasos_kernel_svm(data: np.ndarray, labels: np.ndarray, kernel='linear', lambda_val=0.01, iterations=100, sigma=1.0) -> (list, float): 
    n_samples, n_features = data.shape
    alphas = np.zeros(n_samples)
    b = 0.0


    def linear_kernel(x1: np.ndarray, x2: np.ndarray) -> float:
        return np.dot(x1, x2)
        
    def rbf_kernel(x1: np.ndarray, x2: np.ndarray) -> float:
        return np.exp(-norm(x1 - x2)**2 / (2 * sigma**2))

    if kernel == 'linear':
        k_func = linear_kernel
    elif kernel == 'rbf':
        k_func = lambda x1, x2: rbf_kernel(x1, x2)
    else:
        raise ValueError("E")
        
    K_matrix = np.zeros((n_samples, n_samples))
    for i in range(n_samples):
        for j in range(i, n_samples):

            val = k_func(data[i], data[j])
            K_matrix[i, j] = val
            K_matrix[j, i] = val 
            

    for t in range(1, iterations + 1):
        
        eta = 1.0 / (lambda_val * t)
        for i in range(n_samples):
            y_i = labels[i]
            f_xi = np.dot(alphas * labels, K_matrix[:, i]) + b
            if y_i * f_xi < 1:
                
                alphas[i] = alphas[i] + eta * (y_i - lambda_val * alphas[i])
                b = b + eta * y_i
                
    return alphas.tolist(), b

In [22]:
print(pegasos_kernel_svm(np.array([[1, 2], [2, 3], [3, 1], [4, 1]]), np.array([1, 1, -1, -1]), kernel='linear', lambda_val=0.01, iterations=100))

([100.0, 0.0, -100.0, -100.0], np.float64(-937.4755035279245))


In [23]:
print(pegasos_kernel_svm(np.array([[1, 2], [2, 3], [3, 1], [4, 1]]), np.array([1, 1, -1, -1]), kernel='rbf', lambda_val=0.01, iterations=100, sigma=0.5))

([100.0, 98.99999999999999, -100.0, -100.0], np.float64(-115.0))
