In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Step 2: Load training data
X_train = pd.read_csv('X_train.csv')
y_train = pd.read_csv('y_train.csv')

# Flatten y_train if needed
y_train = y_train.values.ravel()

# Step 3: Define regularization values
C_values = [0.01, 0.1, 0.5, 1, 10, 100]

# Step 4: Prepare storage for results
results = {
    'No Transform': [],
    'Polynomial Features': [],
    'PCA': [],
    'RBF Kernel': []
}

# Step 5: Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train)

# Step 6: Different Transformations

# --- No Transform ---
print("\n=== SVM on Original Scaled Data ===")
for C in C_values:
    model = SVC(C=C, kernel='linear', random_state=42)
    model.fit(X_scaled, y_train)
    y_pred = model.predict(X_scaled)
    acc = accuracy_score(y_train, y_pred)
    results['No Transform'].append((C, acc))
    print(f"C={C} --> Accuracy: {acc:.4f}")


=== SVM on Original Scaled Data ===
C=0.01 --> Accuracy: 0.2855
C=0.1 --> Accuracy: 0.3121
C=0.5 --> Accuracy: 0.3165
C=1 --> Accuracy: 0.3177
C=10 --> Accuracy: 0.3187
C=100 --> Accuracy: 0.3191


In [2]:
# --- Polynomial Features ---
print("\n=== SVM on Polynomial Features ===")
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X_scaled)

for C in C_values:
    model = SVC(C=C, kernel='linear', random_state=42)
    model.fit(X_poly, y_train)
    y_pred = model.predict(X_poly)
    acc = accuracy_score(y_train, y_pred)
    results['Polynomial Features'].append((C, acc))
    print(f"C={C} --> Accuracy: {acc:.4f}")



=== SVM on Polynomial Features ===
C=0.01 --> Accuracy: 0.3312
C=0.1 --> Accuracy: 0.3759
C=0.5 --> Accuracy: 0.3929
C=1 --> Accuracy: 0.3968
C=10 --> Accuracy: 0.4097
C=100 --> Accuracy: 0.4223


In [3]:
# --- PCA Transformation ---
print("\n=== SVM on PCA-transformed Data ===")
pca = PCA(n_components=10)  # keep top 10 components
X_pca = pca.fit_transform(X_scaled)

for C in C_values:
    model = SVC(C=C, kernel='linear', random_state=42)
    model.fit(X_pca, y_train)
    y_pred = model.predict(X_pca)
    acc = accuracy_score(y_train, y_pred)
    results['PCA'].append((C, acc))
    print(f"C={C} --> Accuracy: {acc:.4f}")


=== SVM on PCA-transformed Data ===
C=0.01 --> Accuracy: 0.2841
C=0.1 --> Accuracy: 0.3053
C=0.5 --> Accuracy: 0.3113
C=1 --> Accuracy: 0.3127
C=10 --> Accuracy: 0.3134
C=100 --> Accuracy: 0.3132


In [4]:
# --- RBF Kernel (no transformation needed) ---
print("\n=== SVM with RBF Kernel ===")
for C in C_values:
    model = SVC(C=C, kernel='rbf', gamma='scale', random_state=42)
    model.fit(X_scaled, y_train)
    y_pred = model.predict(X_scaled)
    acc = accuracy_score(y_train, y_pred)
    results['RBF Kernel'].append((C, acc))
    print(f"C={C} --> Accuracy: {acc:.4f}")


=== SVM with RBF Kernel ===
C=0.01 --> Accuracy: 0.2003
C=0.1 --> Accuracy: 0.2913
C=0.5 --> Accuracy: 0.3450
C=1 --> Accuracy: 0.3679
C=10 --> Accuracy: 0.4326
C=100 --> Accuracy: 0.5114
