In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

 Support Vector Machines (SVM) Implementation<br>
This notebook demonstrates both custom and scikit-learn implementations of SVM.<br>
SVM is a powerful supervised learning algorithm used for classification and regression tasks.<br>
It finds the optimal hyperplane that maximizes the margin between classes.

 1. Import Required Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import (
    accuracy_score, confusion_matrix, classification_report,
    roc_curve, auc
)
from sklearn.preprocessing import StandardScaler
from cvxopt import matrix, solvers

Set random seed for reproducibility

In [3]:
np.random.seed(2220)

 2. Data Generation and Preprocessing<br>
Generate synthetic data for binary classification<br>
The data consists of two classes with Gaussian distributions

In [4]:
def generate_data():
    """
    Generate sample data for binary classification.
    
    Returns:
        tuple: (X, y) features and target
    """
    n_samples = 100
    
    # Generate two classes
    X1 = np.random.randn(n_samples//2, 2) + np.array([2, 2])
    X2 = np.random.randn(n_samples//2, 2) + np.array([-2, -2])
    X = np.vstack([X1, X2])
    y = np.hstack([np.ones(n_samples//2), -np.ones(n_samples//2)])
    
    return X, y

 3. Custom SVM Implementation<br>
The custom implementation uses quadratic programming to find the optimal hyperplane<br>
It supports different kernel functions for handling non-linear decision boundaries

In [5]:
def kernel_function(x1, x2, kernel='linear', gamma='scale', degree=3, coef0=0.0):
    """
    Compute kernel function between two points.
    
    Hyperparameters:
    - kernel (str): Type of kernel function ('linear', 'poly', 'rbf', 'sigmoid').
      Different kernels can capture different types of decision boundaries.
    - gamma (float): Kernel coefficient for 'rbf', 'poly', 'sigmoid' kernels.
      Controls the influence of individual training samples.
    - degree (int): Degree of polynomial kernel. Higher degrees can capture more
      complex decision boundaries but may lead to overfitting.
    - coef0 (float): Independent term in kernel function. Used in polynomial and
      sigmoid kernels.
    
    Args:
        x1 (numpy.ndarray): First point
        x2 (numpy.ndarray): Second point
        kernel (str): Type of kernel
        gamma (float): Kernel coefficient
        degree (int): Degree of polynomial kernel
        coef0 (float): Independent term in kernel function
        
    Returns:
        float: Kernel value
    """
    if kernel == 'linear':
        return np.dot(x1, x2)
    elif kernel == 'poly':
        return (gamma * np.dot(x1, x2) + coef0) ** degree
    elif kernel == 'rbf':
        return np.exp(-gamma * np.sum((x1 - x2) ** 2))
    elif kernel == 'sigmoid':
        return np.tanh(gamma * np.dot(x1, x2) + coef0)
    else:
        raise ValueError(f"Unknown kernel: {kernel}")

In [6]:
def compute_kernel_matrix(X, kernel='linear', gamma='scale', degree=3, coef0=0.0):
    """
    Compute kernel matrix for training data.
    
    Args:
        X (numpy.ndarray): Training features
        kernel (str): Type of kernel
        gamma (float): Kernel coefficient
        degree (int): Degree of polynomial kernel
        coef0 (float): Independent term in kernel function
        
    Returns:
        numpy.ndarray: Kernel matrix
    """
    n_samples = X.shape[0]
    K = np.zeros((n_samples, n_samples))
    for i in range(n_samples):
        for j in range(n_samples):
            K[i, j] = kernel_function(X[i], X[j], kernel, gamma, degree, coef0)
    return K

In [7]:
def custom_svm_fit(X, y, kernel='linear', C=1.0, gamma='scale', degree=3, coef0=0.0):
    """
    Train an SVM model using quadratic programming.
    
    Hyperparameters:
    - C (float): Regularization parameter. Controls the trade-off between having
      a large margin and ensuring that points lie on the correct side of the margin.
      Higher values of C lead to a smaller margin but better classification of
      training points.
    
    Args:
        X (numpy.ndarray): Training features
        y (numpy.ndarray): Target values (-1 or 1)
        kernel (str): Type of kernel
        C (float): Regularization parameter
        gamma (float): Kernel coefficient
        degree (int): Degree of polynomial kernel
        coef0 (float): Independent term in kernel function
        
    Returns:
        tuple: (alpha, support_vectors, support_vector_labels, b) trained parameters
    """
    n_samples, n_features = X.shape

    # Compute kernel matrix
    K = compute_kernel_matrix(X, kernel, gamma, degree, coef0)

    # Set up quadratic programming problem
    P = matrix(np.outer(y, y) * K)
    q = matrix(-np.ones(n_samples))
    G = matrix(np.vstack((-np.eye(n_samples), np.eye(n_samples))))
    h = matrix(np.hstack((np.zeros(n_samples), C * np.ones(n_samples))))
    A = matrix(y.astype(float), (1, n_samples))
    b = matrix(0.0)

    # Solve quadratic programming problem
    solvers.options['show_progress'] = False
    solution = solvers.qp(P, q, G, h, A, b)
    alpha = np.array(solution['x']).flatten()

    # Find support vectors
    support_vector_indices = alpha > 1e-5
    support_vectors = X[support_vector_indices]
    support_vector_labels = y[support_vector_indices]
    alpha = alpha[support_vector_indices]

    # Compute bias
    b = 0
    for i in range(len(alpha)):
        b += support_vector_labels[i]
        b -= np.sum(alpha * support_vector_labels * K[support_vector_indices][i])
    b /= len(alpha)
    return alpha, support_vectors, support_vector_labels, b

In [8]:
def custom_svm_predict(X, alpha, support_vectors, support_vector_labels, b,
                      kernel='linear', gamma='scale', degree=3, coef0=0.0):
    """
    Make predictions using the trained model.
    
    Args:
        X (numpy.ndarray): Features to predict
        alpha (numpy.ndarray): Lagrange multipliers
        support_vectors (numpy.ndarray): Support vectors
        support_vector_labels (numpy.ndarray): Labels of support vectors
        b (float): Bias term
        kernel (str): Type of kernel
        gamma (float): Kernel coefficient
        degree (int): Degree of polynomial kernel
        coef0 (float): Independent term in kernel function
        
    Returns:
        numpy.ndarray: Predicted classes (-1 or 1)
    """
    y_pred = np.zeros(len(X))
    for i in range(len(X)):
        s = 0
        for a, sv_y, sv in zip(alpha, support_vector_labels, support_vectors):
            s += a * sv_y * kernel_function(X[i], sv, kernel, gamma, degree, coef0)
        y_pred[i] = s + b
    return np.sign(y_pred)

 4. Model Training and Evaluation<br>
Train both custom and scikit-learn implementations<br>
Compare their performance on the test set<br>
Generate and prepare data

In [9]:
X, y = generate_data()
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=2220
)

Scale features

In [10]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Train custom model

In [11]:
alpha, support_vectors, support_vector_labels, b = custom_svm_fit(
    X_train_scaled, y_train,
    kernel='rbf',    # Use RBF kernel for non-linear decision boundary
    C=1.0,          # Regularization parameter
    gamma='scale'   # Kernel coefficient
)
custom_predictions = custom_svm_predict(
    X_test_scaled, alpha, support_vectors, support_vector_labels, b,
    kernel='rbf',
    gamma='scale'
)

TypeError: bad operand type for unary -: 'str'

Train scikit-learn model

In [None]:
sklearn_model = SVC(
    kernel='rbf',    # Use RBF kernel
    C=1.0,          # Regularization parameter
    gamma='scale'   # Kernel coefficient
)
sklearn_model.fit(X_train_scaled, y_train)
sklearn_predictions = sklearn_model.predict(X_test_scaled)

 5. Model Evaluation<br>
Evaluate model performance using various metrics<br>
Compare custom and scikit-learn implementations<br>
Print evaluation metrics

In [None]:
print("\nCustom Implementation Results:")
print(f"Accuracy: {accuracy_score(y_test, custom_predictions):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, custom_predictions))

In [None]:
print("\nScikit-learn Implementation Results:")
print(f"Accuracy: {accuracy_score(y_test, sklearn_predictions):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, sklearn_predictions))

 6. Visualization<br>
Create visualizations to understand model behavior<br>
Plot decision boundaries and confusion matrices

In [None]:
def plot_decision_boundary(X, y, model, support_vectors=None, title="Decision Boundary"):
    """
    Plot the decision boundary and data points using seaborn.
    
    Args:
        X (numpy.ndarray): Features
        y (numpy.ndarray): Target values
        model: Trained model
        support_vectors (numpy.ndarray): Support vectors (optional)
        title (str): Plot title
    """
    h = 0.02  # Step size
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                        np.arange(y_min, y_max, h))
    
    # Predict for each point in the mesh
    if hasattr(model, 'predict'):
        Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    else:
        Z = custom_svm_predict(
            np.c_[xx.ravel(), yy.ravel()],
            model[0], model[1], model[2], model[3],
            kernel='rbf', gamma='scale'
        )
    Z = Z.reshape(xx.shape)
    
    # Create DataFrame for seaborn
    df = pd.DataFrame({
        'Feature 1': X[:, 0],
        'Feature 2': X[:, 1],
        'Class': y
    })
    
    # Plot
    plt.figure(figsize=(10, 6))
    sns.scatterplot(data=df, x='Feature 1', y='Feature 2', hue='Class', alpha=0.8)
    plt.contourf(xx, yy, Z, alpha=0.4)
    if support_vectors is not None:
        plt.scatter(support_vectors[:, 0], support_vectors[:, 1],
                   s=100, linewidth=1, facecolors='none', edgecolors='k',
                   label='Support Vectors')
    plt.title(title)
    plt.legend()
    plt.show()

In [None]:
def plot_confusion_matrix(y_true, y_pred, title):
    """
    Plot confusion matrix using seaborn.
    
    Args:
        y_true (numpy.ndarray): True labels
        y_pred (numpy.ndarray): Predicted labels
        title (str): Plot title
    """
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Class -1', 'Class 1'],
                yticklabels=['Class -1', 'Class 1'])
    plt.title(title)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

Create visualizations

In [None]:
plot_decision_boundary(X_test, y_test, 
                      (alpha, support_vectors, support_vector_labels, b),
                      support_vectors=support_vectors,
                      title="Custom Implementation Decision Boundary")
plot_confusion_matrix(y_test, custom_predictions,
                     "Custom Implementation Confusion Matrix")

In [None]:
plot_decision_boundary(X_test, y_test, sklearn_model,
                      title="Scikit-learn Implementation Decision Boundary")
plot_confusion_matrix(y_test, sklearn_predictions,
                     "Scikit-learn Implementation Confusion Matrix") 