In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score
import matplotlib.pyplot as plt

X = pd.read_csv('X_with_cluster_distances.csv')
Y = pd.read_csv('Y.csv')

X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42, stratify=Y
)

X_train.to_csv('X_train.csv', index=False)
X_test.to_csv('X_test.csv', index=False)

y_train.to_csv('y_train.csv', index=False)
y_test.to_csv('y_test.csv', index=False)

print("Training features shape:", X_train.shape)
print("Testing features shape:", X_test.shape)
print("Training labels shape:", y_train.shape)
print("Testing labels shape:", y_test.shape)

X_train = pd.read_csv('X_train.csv')
X_test = pd.read_csv('X_test.csv')
y_train = pd.read_csv('y_train.csv')
y_test = pd.read_csv('y_test.csv')
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_small, _, y_train_small, _ = train_test_split(
    X_train_scaled, y_train, train_size=0.5, stratify=y_train, random_state=42
)

final_results = []

In [None]:
# No Transform (Linear SVM)
train_acc_list = []
test_acc_list = []
precision_list = []
recall_list = []

C_values = [0.01, 0.1, 1, 5, 10, 100]

print("\n=== Linear SVM (No Transform) ===")

for C in C_values:
    model = SVC(C=C, kernel='linear', random_state=42)
    model.fit(X_train_small, y_train_small)

    y_train_pred = model.predict(X_train_small)
    y_test_pred = model.predict(X_test_scaled)

    train_acc = accuracy_score(y_train_small, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)
    precision = precision_score(y_test, y_test_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_test_pred, average='weighted', zero_division=0)

    final_results.append({
        'Transformation': 'No Transform',
        'C': C,
        'Train Accuracy': train_acc,
        'Test Accuracy': test_acc,
        'Precision': precision,
        'Recall': recall
    })

    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    precision_list.append(precision)
    recall_list.append(recall)

plt.figure(figsize=(8,5))
plt.plot(C_values, train_acc_list, marker='o', label='Training Accuracy')
plt.plot(C_values, test_acc_list, marker='o', label='Testing Accuracy')
plt.xscale('log')
plt.xlabel('C (log scale)')
plt.ylabel('Accuracy')
plt.title('Training vs Testing Accuracy: No Transform')
plt.legend()
plt.grid(True)
plt.show()

prec_recall_table = pd.DataFrame({
    'C': C_values,
    'Precision': precision_list,
    'Recall': recall_list
})
print("\nPrecision and Recall Table (No Transform):")
display(prec_recall_table)

In [None]:
train_acc_list = []
test_acc_list = []
precision_list = []
recall_list = []
C_values = [0.01, 0.1, 1, 5, 10, 100]

print("\n=== SVM with Polynomial Kernel (Degree 2) ===")

for C in C_values:
    model = SVC(C=C, kernel='poly', degree=2, gamma='scale', coef0=1, random_state=42)
    model.fit(X_train_small, y_train_small)

    y_train_pred = model.predict(X_train_small)
    y_test_pred = model.predict(X_test_scaled)

    train_acc = accuracy_score(y_train_small, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)
    precision = precision_score(y_test, y_test_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_test_pred, average='weighted', zero_division=0)

    final_results.append({
        'Transformation': 'Poly Kernel Degree 2',
        'C': C,
        'Train Accuracy': train_acc,
        'Test Accuracy': test_acc,
        'Precision': precision,
        'Recall': recall
    })

    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    precision_list.append(precision)
    recall_list.append(recall)

# Plot
plt.figure(figsize=(8,5))
plt.plot(C_values, train_acc_list, marker='o', label='Training Accuracy')
plt.plot(C_values, test_acc_list, marker='o', label='Testing Accuracy')
plt.xscale('log')
plt.xlabel('C (log scale)')
plt.ylabel('Accuracy')
plt.title('Training vs Testing Accuracy: Polynomial Kernel Degree 2')
plt.legend()
plt.grid(True)
plt.show()

# Table
prec_recall_table = pd.DataFrame({'C': C_values, 'Precision': precision_list, 'Recall': recall_list})
print("\nPrecision and Recall Table (Polynomial Kernel Degree 2):")
display(prec_recall_table)

In [None]:
train_acc_list = []
test_acc_list = []
precision_list = []
recall_list = []
C_values = [0.01, 0.1, 1, 5, 10, 100]

print("\n=== SVM with Polynomial Kernel (Degree 3) ===")

for C in C_values:
    model = SVC(C=C, kernel='poly', degree=3, gamma='scale', coef0=1, random_state=42)
    model.fit(X_train_small, y_train_small)

    y_train_pred = model.predict(X_train_small)
    y_test_pred = model.predict(X_test_scaled)

    train_acc = accuracy_score(y_train_small, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)
    precision = precision_score(y_test, y_test_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_test_pred, average='weighted', zero_division=0)

    final_results.append({
        'Transformation': 'Poly Kernel Degree 3',
        'C': C,
        'Train Accuracy': train_acc,
        'Test Accuracy': test_acc,
        'Precision': precision,
        'Recall': recall
    })

    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    precision_list.append(precision)
    recall_list.append(recall)
# Plot
plt.figure(figsize=(8,5))
plt.plot(C_values, train_acc_list, marker='o', label='Training Accuracy')
plt.plot(C_values, test_acc_list, marker='o', label='Testing Accuracy')
plt.xscale('log')
plt.xlabel('C (log scale)')
plt.ylabel('Accuracy')
plt.title('Training vs Testing Accuracy: Polynomial Degree 3')
plt.legend()
plt.grid(True)
plt.show()

# Table
prec_recall_table = pd.DataFrame({'C': C_values, 'Precision': precision_list, 'Recall': recall_list})
print("\nPrecision and Recall Table (Polynomial Degree 3):")
display(prec_recall_table)

In [None]:
train_acc_list = []
test_acc_list = []
precision_list = []
recall_list = []
C_values = [0.01, 0.1, 1, 5, 10, 100]

print("\n=== SVM with Polynomial Kernel (Degree 4) ===")

for C in C_values:
    model = SVC(C=C, kernel='poly', degree=4, gamma='scale', coef0=1, random_state=42)
    model.fit(X_train_small, y_train_small)

    y_train_pred = model.predict(X_train_small)
    y_test_pred = model.predict(X_test_scaled)

    train_acc = accuracy_score(y_train_small, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)
    precision = precision_score(y_test, y_test_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_test_pred, average='weighted', zero_division=0)

    final_results.append({
        'Transformation': 'Poly Kernel Degree 4',
        'C': C,
        'Train Accuracy': train_acc,
        'Test Accuracy': test_acc,
        'Precision': precision,
        'Recall': recall
    })

    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    precision_list.append(precision)
    recall_list.append(recall)

# Plot
plt.figure(figsize=(8,5))
plt.plot(C_values, train_acc_list, marker='o', label='Training Accuracy')
plt.plot(C_values, test_acc_list, marker='o', label='Testing Accuracy')
plt.xscale('log')
plt.xlabel('C (log scale)')
plt.ylabel('Accuracy')
plt.title('Training vs Testing Accuracy: Polynomial Degree 4')
plt.legend()
plt.grid(True)
plt.show()

# Table
prec_recall_table = pd.DataFrame({'C': C_values, 'Precision': precision_list, 'Recall': recall_list})
print("\nPrecision and Recall Table (Polynomial Degree 4):")
display(prec_recall_table)

In [None]:
# Log Transform
X_train_log = np.log(X_train_small + 1e-5 - np.min(X_train_small))
X_test_log = np.log(X_test_scaled + 1e-5 - np.min(X_train_small))

train_acc_list = []
test_acc_list = []
precision_list = []
recall_list = []
C_values = [0.01, 0.1, 1, 10, 100, 500]
print("\n=== Log Transform ===")

for C in C_values:
    model = SVC(C=C, kernel='linear', random_state=42)
    model.fit(X_train_log, y_train_small)

    y_train_pred = model.predict(X_train_log)
    y_test_pred = model.predict(X_test_log)

    train_acc = accuracy_score(y_train_small, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)
    precision = precision_score(y_test, y_test_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_test_pred, average='weighted', zero_division=0)

    final_results.append({
        'Transformation': 'Log Transform',
        'C': C,
        'Train Accuracy': train_acc,
        'Test Accuracy': test_acc,
        'Precision': precision,
        'Recall': recall
    })

    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    precision_list.append(precision)
    recall_list.append(recall)

# Plot
plt.figure(figsize=(8,5))
plt.plot(C_values, train_acc_list, marker='o', label='Training Accuracy')
plt.plot(C_values, test_acc_list, marker='o', label='Testing Accuracy')
plt.xscale('log')
plt.xlabel('C (log scale)')
plt.ylabel('Accuracy')
plt.title('Training vs Testing Accuracy: Log Transform')
plt.legend()
plt.grid(True)
plt.show()

# Table
prec_recall_table = pd.DataFrame({'C': C_values, 'Precision': precision_list, 'Recall': recall_list})
print("\nPrecision and Recall Table (Log Transform):")
display(prec_recall_table)

In [None]:
# RBF Kernel
train_acc_list = []
test_acc_list = []
precision_list = []
recall_list = []
C_values = [0.01, 0.1, 1, 10, 100, 500]
print("\n=== RBF Kernel ===")

for C in C_values:
    model = SVC(C=C, kernel='rbf', gamma='scale', random_state=42)
    model.fit(X_train_small, y_train_small)

    y_train_pred = model.predict(X_train_small)
    y_test_pred = model.predict(X_test_scaled)

    train_acc = accuracy_score(y_train_small, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)
    precision = precision_score(y_test, y_test_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_test_pred, average='weighted', zero_division=0)

    final_results.append({
        'Transformation': 'RBF Kernel',
        'C': C,
        'Train Accuracy': train_acc,
        'Test Accuracy': test_acc,
        'Precision': precision,
        'Recall': recall
    })

    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    precision_list.append(precision)
    recall_list.append(recall)

# Plot
plt.figure(figsize=(8,5))
plt.plot(C_values, train_acc_list, marker='o', label='Training Accuracy')
plt.plot(C_values, test_acc_list, marker='o', label='Testing Accuracy')
plt.xscale('log')
plt.xlabel('C (log scale)')
plt.ylabel('Accuracy')
plt.title('Training vs Testing Accuracy: RBF Kernel')
plt.legend()
plt.grid(True)
plt.show()

# Table
prec_recall_table = pd.DataFrame({'C': C_values, 'Precision': precision_list, 'Recall': recall_list})
print("\nPrecision and Recall Table (RBF Kernel):")
display(prec_recall_table)

In [None]:
final_results_df = pd.DataFrame(final_results)
from IPython.display import display
display(final_results_df)
final_results_df.to_csv('svm_final_metrics_results.csv', index=False)