Implementation of SVM with a sigmoid kernel

In [4]:
import numpy as np

The formula for the kernel is : $$K(x, y) = \tanh\left(\alpha \, (\mathbf{x} \cdot \mathbf{y}) + c\right)$$

In [5]:
class SVMSigmoid:
    def __init__(self, C=1.0, tol=1e-3, max_iter=1000, alpha_coef=0.01, c_coef=0.0):
        self.C = C
        self.tol = tol
        self.max_iter = max_iter
        self.alpha_coef = alpha_coef  # alpha pararameter of the sigmoid kernel
        self.c_coef = c_coef          # c parameter of the sigmoid kernel
        self.alpha = None
        self.b = 0
        self.X = None
        self.y = None

    def kernel(self, x1, x2):
        # sigomoid kernel: K(x1, x2) = tanh(alpha_coef * <x1,x2> + c_coef)
        return np.tanh(self.alpha_coef * np.dot(x1, x2) + self.c_coef)

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.X = X
        self.y = y
        self.alpha = np.zeros(n_samples)
        self.b = 0

        passes = 0
        while passes < self.max_iter:
            num_changed_alphas = 0
            for i in range(n_samples):
                f_i = np.sum(self.alpha * y * np.array([self.kernel(X[j], X[i]) for j in range(n_samples)])) + self.b
                E_i = f_i - y[i]

                if ((y[i]*E_i < -self.tol and self.alpha[i] < self.C) or
                    (y[i]*E_i > self.tol and self.alpha[i] > 0)):

                    j = np.random.choice([x for x in range(n_samples) if x != i])
                    f_j = np.sum(self.alpha * y * np.array([self.kernel(X[k], X[j]) for k in range(n_samples)])) + self.b
                    E_j = f_j - y[j]

                    alpha_i_old, alpha_j_old = self.alpha[i], self.alpha[j]

                    if y[i] != y[j]:
                        L = max(0, self.alpha[j] - self.alpha[i])
                        H = min(self.C, self.C + self.alpha[j] - self.alpha[i])
                    else:
                        L = max(0, self.alpha[i] + self.alpha[j] - self.C)
                        H = min(self.C, self.alpha[i] + self.alpha[j])
                    if L == H:
                        continue

                    K_ii = self.kernel(X[i], X[i])
                    K_jj = self.kernel(X[j], X[j])
                    K_ij = self.kernel(X[i], X[j])
                    eta = 2 * K_ij - K_ii - K_jj
                    if eta >= 0:
                        continue

                    self.alpha[j] = self.alpha[j] - (y[j]*(E_i - E_j)) / eta
                    self.alpha[j] = np.clip(self.alpha[j], L, H)

                    if abs(self.alpha[j] - alpha_j_old) < 1e-5:
                        continue

                    self.alpha[i] = self.alpha[i] + y[i]*y[j]*(alpha_j_old - self.alpha[j])

                    b1 = self.b - E_i - y[i]*(self.alpha[i]-alpha_i_old)*K_ii - y[j]*(self.alpha[j]-alpha_j_old)*K_ij
                    b2 = self.b - E_j - y[i]*(self.alpha[i]-alpha_i_old)*K_ij - y[j]*(self.alpha[j]-alpha_j_old)*K_jj

                    if 0 < self.alpha[i] < self.C:
                        self.b = b1
                    elif 0 < self.alpha[j] < self.C:
                        self.b = b2
                    else:
                        self.b = (b1 + b2) / 2

                    num_changed_alphas += 1

            if num_changed_alphas == 0:
                passes += 1
            else:
                passes = 0

        self.support_vectors_idx = np.where(self.alpha > 1e-5)[0]
        self.support_vectors = self.X[self.support_vectors_idx]
        self.support_vector_labels = self.y[self.support_vectors_idx]
        self.alpha = self.alpha[self.support_vectors_idx]

    def project(self, X):
        y_predict = np.zeros(X.shape[0])
        for i in range(X.shape[0]):
            s = 0
            for alpha, sv_y, sv in zip(self.alpha, self.support_vector_labels, self.support_vectors):
                s += alpha * sv_y * self.kernel(sv, X[i])
            y_predict[i] = s
        return y_predict + self.b

    def predict(self, X):
        return np.sign(self.project(X))

def compute_metrics(y_true, y_pred):
  accuracy = np.mean(y_true == y_pred)
  tp = np.sum((y_true == 1) & (y_pred == 1))
  tn = np.sum((y_true == -1) & (y_pred == -1))
  fp = np.sum((y_true == -1) & (y_pred == 1))
  fn = np.sum((y_true == 1) & (y_pred == -1))

  precision = tp / (tp + fp) if (tp+fp) > 0 else 0
  recall = tp / (tp + fn) if (tp+fn) > 0 else 0
  f1_score = 2 * precision * recall / (precision + recall) if (precision+recall) > 0 else 0

  return accuracy, precision, recall, f1_score

In [6]:
if __name__ == "__main__":
    # Ejemplo simple: datos no linealmente separables con transformación sigmoidal
    np.random.seed(42)
    X1 = np.random.randn(30, 2) + np.array([1, 1])
    X2 = np.random.randn(30, 2) + np.array([-1, -1])
    X = np.vstack((X1, X2))
    y = np.hstack((np.ones(30), -np.ones(30)))

    model = SVMSigmoid(C=1.0, alpha_coef=0.1, c_coef=0.0)
    model.fit(X, y)
    preds = model.predict(X)

    accuracy, precision, recall, f1_score = compute_metrics(y, preds)
    print("Predicciones:", preds)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1_score:.4f}")

Predicciones: [ 1.  1.  1.  1.  1.  1.  1. -1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1. -1. -1.  1. -1. -1.  1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.  1.
 -1. -1.  1. -1. -1.  1.]
Accuracy: 0.9000
Precision: 0.8529
Recall: 0.9667
F1 Score: 0.9062
