In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the training and test sets
X_train = pd.read_csv('X_train.csv')
X_test = pd.read_csv('X_test.csv')
y_train = pd.read_csv('y_train.csv')
y_test = pd.read_csv('y_test.csv')
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

# Standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

results = {
    'No Transform': [],
    'Poly Degree 2': [],
    'Poly Degree 3': [],
    'Poly Degree 4': [],
    'Log Transform': [],
    'RBF Kernel': []
}


In [3]:
# --- No Transform ---
C_values = [0.01, 0.1, 1, 10, 100, 500]
print("\n=== Linear SVM (No Transform) ===")
for C in C_values:
    model = SVC(C=C, kernel='linear', random_state=42)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    results['No Transform'].append((C, acc))
    print(f"C={C} --> Accuracy: {acc:.4f}")



=== Linear SVM (No Transform) ===
C=0.01 --> Accuracy: 0.2628
C=0.1 --> Accuracy: 0.3040
C=1 --> Accuracy: 0.2986
C=10 --> Accuracy: 0.3005
C=100 --> Accuracy: 0.2981
C=500 --> Accuracy: 0.2991


In [None]:
# --- Polynomial Features ---
C_values = [0.01, 0.1, 1, 10, 100, 500]
for degree in [2, 3, 4]:
    print(f"\n=== Linear SVM (Polynomial Features Degree {degree}) ===")
    poly = PolynomialFeatures(degree=degree)
    X_train_poly = poly.fit_transform(X_train_scaled)
    X_test_poly = poly.transform(X_test_scaled)
    
    for C in C_values:
        model = SVC(C=C, kernel='linear', random_state=42)
        model.fit(X_train_poly, y_train)
        y_pred = model.predict(X_test_poly)
        acc = accuracy_score(y_test, y_pred)
        results[f'Poly Degree {degree}'].append((C, acc))
        print(f"C={C} --> Accuracy: {acc:.4f}")


=== Linear SVM (Polynomial Features Degree 2) ===
C=0.01 --> Accuracy: 0.2912
C=0.1 --> Accuracy: 0.3314
C=1 --> Accuracy: 0.3568
C=10 --> Accuracy: 0.3617
C=100 --> Accuracy: 0.3647


In [None]:
C_values = [0.01, 0.1, 1, 10, 100, 500]
# --- 3. Hand-designed Log Transform ---
print("\n=== Linear SVM (Log Transform) ===")
# To avoid log(0), we add a small constant (1e-5)
X_train_log = np.log(X_train_scaled + 1e-5 - np.min(X_train_scaled))
X_test_log = np.log(X_test_scaled + 1e-5 - np.min(X_train_scaled))

for C in C_values:
    model = SVC(C=C, kernel='linear', random_state=42)
    model.fit(X_train_log, y_train)
    y_pred = model.predict(X_test_log)
    acc = accuracy_score(y_test, y_pred)
    results['Log Transform'].append((C, acc))
    print(f"C={C} --> Accuracy: {acc:.4f}")


=== SVM on PCA-transformed Data ===
C=0.01 --> Accuracy: 0.2673
C=0.1 --> Accuracy: 0.2932
C=1 --> Accuracy: 0.3059
C=10 --> Accuracy: 0.3059
C=100 --> Accuracy: 0.3059


In [None]:
# --- RBF Kernel (no transformation needed) ---
C_values = [0.01, 0.1, 1, 10, 100, 500]
# --- 4. RBF Kernel ---
print("\n=== SVM with RBF Kernel ===")
for C in C_values:
    model = SVC(C=C, kernel='rbf', gamma='scale', random_state=42)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    results['RBF Kernel'].append((C, acc))
    print(f"C={C} --> Accuracy: {acc:.4f}")


=== SVM with RBF Kernel ===
C=0.01 --> Accuracy: 0.1747
C=0.1 --> Accuracy: 0.2570
C=1 --> Accuracy: 0.3231
C=10 --> Accuracy: 0.3539
C=100 --> Accuracy: 0.3647
C=1000 --> Accuracy: 0.3583
