# 3a

In [None]:
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [None]:
# data pre-processing for SPAMBASE:
data = np.loadtxt("spambase/spambase.data", delimiter=",")
X = data[:, :-1]
y = data[:, -1]

scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

1. SVM with Linear Kernel

In [None]:
print("\n 1. LINEAR KERNEL SVM")
print("="*60)
start_time = time.time()

# train linear svm:
svm_linear = SVC(kernel='linear', C=1.0, random_state=42)
svm_linear.fit(X_train, y_train)

train_pred_linear = svm_linear.predict(X_train)
test_pred_linear = svm_linear.predict(X_test)

train_acc_linear = np.mean(train_pred_linear == y_train)
test_acc_linear = np.mean(test_pred_linear == y_test)

linear_time = time.time() - start_time

print(f"Train Accuracy: {train_acc_linear:.4f}")
print(f"Test Accuracy: {test_acc_linear:.4f}")
print(f"Number of Support Vectors: {sum(svm_linear.n_support_)}")
print(f"Training Time: {linear_time:.2f} seconds")

2. Training SVM with RBF Kernel:

In [None]:
print("\n 2. RBF Kernel SVM")
print("="*60)
start_time = time.time()

svm_rbf = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm_rbf.fit(X_train, y_train)

train_pred_rbf = svm_rbf.predict(X_train)
test_pred_rbf = svm_rbf.predict(X_test)

train_acc_rbf = np.mean(train_pred_rbf==y_train)
test_acc_rbf = np.mean(test_pred_rbf==y_test)

rbf_time = time.time() - start_time

print(f"Train Accuracy: {train_acc_rbf:.4f}")
print(f"Test Accuracy: {test_acc_rbf:.4f}")
print(f"Number of Support Vectors: {sum(svm_rbf.n_support_)}")
print(f"Training Time: {rbf_time:.2f} seconds")

In [None]:
print("\n3. POLYNOMIAL KERNEL SVM")
print("="*60)

start_time = time.time()

svm_poly = SVC(kernel='poly', degree=3, C=10.0, gamma='scale', random_state=42)
svm_poly.fit(X_train, y_train)

train_pred_poly = svm_poly.predict(X_train)
test_pred_poly = svm_poly.predict(X_test)

train_acc_poly = np.mean(train_pred_poly == y_train)
test_acc_poly = np.mean(test_pred_poly == y_test)

poly_time = time.time() - start_time

print(f"Train Accuracy: {train_acc_poly:.4f}")
print(f"Test Accuracy: {test_acc_poly:.4f}")
print(f"Number of Support Vectors: {sum(svm_poly.n_support_)}")
print(f"Training Time: {poly_time:.2f} seconds")

In [None]:
# Try different C values and parameters
def test_hyperparameters():
    print("\n" + "="*60)
    print("HYPERPARAMETER TUNING")
    print("="*60)

    # Test different C values for each kernel
    C_values = [0.1, 1.0, 10.0]

    results = {}

    for kernel_type in ['linear', 'rbf', 'poly']:
        print(f"\nTesting {kernel_type.upper()} kernel with different C values:")
        print("-"*40)

        kernel_results = []

        for C in C_values:
            if kernel_type == 'linear':
                svm = SVC(kernel='linear', C=C, random_state=42)
            elif kernel_type == 'rbf':
                svm = SVC(kernel='rbf', C=C, gamma='scale', random_state=42)
            else:  # poly
                svm = SVC(kernel='poly', C=C, degree=3, gamma='scale', random_state=42)

            svm.fit(X_train, y_train)
            test_acc = svm.score(X_test, y_test)

            kernel_results.append((C, test_acc))
            print(f"  C={C:4.1f}: Test Accuracy = {test_acc:.4f}")

        results[kernel_type] = kernel_results

    return results

# Run hyperparameter testing
hp_results = test_hyperparameters()

In [None]:
def print_summary_table():
    print("\n" + "="*80)
    print("PROBLEM 3A: SPAMBASE SVM RESULTS SUMMARY")
    print("="*80)

    # Create results table
    results_data = [
        ["Linear", train_acc_linear, test_acc_linear, sum(svm_linear.n_support_), linear_time],
        ["RBF", train_acc_rbf, test_acc_rbf, sum(svm_rbf.n_support_), rbf_time],
        ["Polynomial", train_acc_poly, test_acc_poly, sum(svm_poly.n_support_), poly_time]
    ]

    print(f"{'Kernel':<12} {'Train Acc':<12} {'Test Acc':<12} {'Support Vectors':<18} {'Time (s)':<10}")
    print("-"*70)

    for row in results_data:
        kernel, train, test, sv, time_s = row
        print(f"{kernel:<12} {train:<12.4f} {test:<12.4f} {sv:<18} {time_s:<10.2f}")

def check_expectation(kernel, accuracy):
    if kernel == 'Linear':
        return 0.89 <= accuracy <= 0.93
    elif kernel == 'RBF':
        return 0.93 <= accuracy <= 0.97
    else:  # Polynomial
        return 0.85 <= accuracy <= 0.90

print_summary_table()