<a href="https://colab.research.google.com/github/tswiger34/Business-Analytics/blob/main/SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Load dataset and libraries

In [None]:
## Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score

In [None]:
## Data
data = pd.read_csv("/train.csv").dropna()

X_encoded = pd.get_dummies(data, columns=['EJ'])
X_encoded.head(3)

X = X_encoded.drop(columns = ['Class', 'Id'])

## Scaling the data
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

y = data["Class"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X.info()

SVM Model

In [None]:
clf = svm.SVC(kernel='linear')

# 4. Train the SVM model
clf.fit(X_train, y_train)

# 5. Make predictions on the test set
y_pred = clf.predict(X_test)

# 6. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9272727272727272


Finding best C hyperparameter

In [None]:
# Step 1: Define a wide range of C values
C_values = [7, 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8, 8.1, 8.2, 8.4, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9]

# Step 3: Perform cross-validation with different C values
best_score = 0
best_C = None

for C in C_values:
    # Step 4: Train the SVM with a specific C value and evaluate on the validation set
    svm_model = svm.SVC(C=C)
    scores = cross_val_score(svm_model, X_train, y_train, cv=5)
    average_score = scores.mean()

    # Step 5: Update the best C value if necessary
    if average_score > best_score:
        best_score = average_score
        best_C = C

# Step 6: Train the final SVM model with the best C value
final_model = svm.SVC(C=best_C)
final_model.fit(X_train, y_train)

# Step 7: Evaluate the final model on an independent test set
y_pred = final_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", test_accuracy)
print(best_C)

Accuracy: 0.8909090909090909
7.1


Find the best Gamma hyper parameter

In [None]:
# Step 1: Define a range of gamma values
gamma_values = [.01, .02, .03, .04, .05, .06, .07, .08, .09]

# Step 3: Perform cross-validation with different gamma values
best_score = 0
best_gamma = None

for gamma in gamma_values:
    # Step 4: Train the SVM with a specific gamma value and evaluate on the validation set
    svm_model = svm.SVC(kernel='rbf', gamma=gamma)
    scores = cross_val_score(svm_model, X_train, y_train, cv=5)
    average_score = scores.mean()

    # Step 5: Update the best gamma value if necessary
    if average_score > best_score:
        best_score = average_score
        best_gamma = gamma

# Step 6: Train the final SVM model with the best gamma value
final_model = svm.SVC(kernel='rbf', gamma=best_gamma)
final_model.fit(X_train, y_train)

# Step 7: Evaluate the final model on an independent test set
test_score = final_model.score(X_test, y_test)

print('Accuracy:', test_score)
print('Gamma:', best_gamma)

Accuracy: 0.8727272727272727
Gamma: 0.03


In [None]:
## SVM Model 2
clf = svm.SVC(kernel='rbf', C= 7.1, gamma = .03)

# 4. Train the SVM model
clf.fit(X_train, y_train)

# 5. Make predictions on the test set
y_pred = clf.predict(X_test)

# 6. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8909090909090909


In [None]:
## SVM Model 3

In [None]:
clf = svm.SVC(kernel='poly')

# 4. Train the SVM model
clf.fit(X_train, y_train)

# 5. Make predictions on the test set
y_pred = clf.predict(X_test)

# 6. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8727272727272727


Do the same using dimensionality reduction for training times

In [None]:
## PCA
pca = PCA(n_components=.95)
X_reduced = pca.fit_transform(X)

In [None]:
## Split into test and training sets
X_train, X_test, y_train, y_test = train_test_split(X_reduced, y, test_size=0.2, random_state=42)

In [None]:
## SVM Model 4

clfr1 = svm.SVC(kernel='linear')

# 4. Train the SVM model
clfr1.fit(X_train, y_train)

# 5. Make predictions on the test set
y_pred = clfr1.predict(X_test)

# 6. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9090909090909091


In [None]:
## SVM Model 5
clfr2 = svm.SVC(kernel='rbf', C = 7.1, gamma = .03)

# 4. Train the SVM model
clfr2.fit(X_train, y_train)

# 5. Make predictions on the test set
y_pred = clfr2.predict(X_test)

# 6. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8909090909090909


In [None]:
clfr3 = svm.SVC(kernel='poly')  # You can choose different kernels such as 'linear', 'poly', 'rbf', etc.

# 4. Train the SVM model
clfr3.fit(X_train, y_train)

# 5. Make predictions on the test set
y_pred = clfr3.predict(X_test)

# 6. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8727272727272727
