# SVM

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVR,SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,r2_score,f1_score,accuracy_score,precision_score,recall_score

In [2]:
import warnings
# Suppress future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
data=pd.read_csv('SVM.csv')
data.head()

Unnamed: 0,clientid,income,age,loan,Credit_risk_score
0,1,66155.9251,59.017015,8106.532131,0
1,2,34415.15397,48.117153,6564.745018,0
2,3,57317.17006,63.108049,8020.953296,0
3,4,42709.5342,45.751972,6103.64226,0
4,5,66952.68885,18.584336,8770.099235,1


In [4]:
data.isna().sum()

clientid             0
income               0
age                  3
loan                 0
Credit_risk_score    0
dtype: int64

In [5]:
data=data.fillna(method='bfill')

In [6]:
data.isna().sum()

clientid             0
income               0
age                  0
loan                 0
Credit_risk_score    0
dtype: int64

In [7]:
x=data.iloc[:,:-1]
y=data.iloc[:,-1:]
print('X shape:\t',x.shape)
print()
print('Y shape:\t',y.shape)

X shape:	 (2000, 4)

Y shape:	 (2000, 1)


In [8]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=.1,random_state=2)
print('****************Training Data Shape********************\n')
print('Training data-X- Shape:\t',xtrain.shape)
print()
print('Training data-y- Shape:\t',ytrain.shape)
print()
print('****************Testing Data Shape********************\n')
print('Testing data(x-Input) Shape:\t',xtest.shape)
print()
print('Testing data(y-Outcome) Shape:\t',ytest.shape)


****************Training Data Shape********************

Training data-X- Shape:	 (1800, 4)

Training data-y- Shape:	 (1800, 1)

****************Testing Data Shape********************

Testing data(x-Input) Shape:	 (200, 4)

Testing data(y-Outcome) Shape:	 (200, 1)


In [9]:
#SVC with linear kernal 
ytrain = ytrain.values.ravel()
ytest = ytest.values.ravel()
lin_svc=SVC(kernel='linear')
print('='*100)
print('Hyperparameter:\n',lin_svc.get_params())
print('='*100)
#Train phase
lin_svc.fit(xtrain,ytrain)
#Testing phase
ypred=lin_svc.predict(xtest)
print('Predicted price:\n',ypred)
print('='*100)
#Performance measures
print('Mean Squared Error:\t',mean_squared_error(ytest,ypred))
print()
print('RMSE:\t',np.sqrt(mean_squared_error(ytest,ypred)))
print()
print('R-Square value:\t',r2_score(ytest,ypred))
print()
print('F1 Score value:\t',f1_score(ytest,ypred))
print()
print('Recall Score value:\t',recall_score(ytest,ypred))
print()
print('Precision Score value:\t',precision_score(ytest,ypred))
support_vectors_lin = lin_svc.support_vectors_


Hyperparameter:
 {'C': 1.0, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'linear', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Predicted price:
 [1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 1 1 1 0 0 1 0 0 0 0 0 0
 1 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 1
 0 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 1 0 1 0
 0 0 0 0 0 1 1 1 0 0 0 1 0 1 1]
Mean Squared Error:	 0.065

RMSE:	 0.25495097567963926

R-Square value:	 0.6081977094635322

F1 Score value:	 0.8505747126436781

Recall Score value:	 0.8809523809523809

Precision Score value:	 0.8222222222222222


In [10]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import StandardScaler

In [11]:
# Hyperparameter Tuning
# For Linear SVM
param_grid_linear = {'C': [0.001, 0.01, 0.1, 1, 10]}
grid_linear = GridSearchCV(SVC(kernel='linear'), param_grid=param_grid_linear, cv=5)
grid_linear.fit(xtrain, ytrain)
best_linear_svc = grid_linear.best_estimator_

In [12]:
# Support Vectors
n_support_vectors_linear = len(best_linear_svc.support_)
# Model Comparison
linear_svc_accuracy = best_linear_svc.score(xtest, ytest)

print("\n\nSupport Vectors in Linear SVM:", n_support_vectors_linear)
print()
print("\nAccuracy of Linear SVM:", linear_svc_accuracy)




Support Vectors in Linear SVM: 201


Accuracy of Linear SVM: 0.925


In [13]:
#SVC with non linear kernal 
non_lin_svc=SVC(kernel='rbf')
print('='*100)
print('Hyperparameter:\n',non_lin_svc.get_params())
print('='*100)
#Train phase
non_lin_svc.fit(xtrain,ytrain)
#Testing phase
ypred=non_lin_svc.predict(xtest)
print('Predicted price:\n',ypred)
print('='*100)
#Performance measures
print('Mean Squared Error:\t',mean_squared_error(ytest,ypred))
print()
print('RMSE:\t',np.sqrt(mean_squared_error(ytest,ypred)))
print()
print('R-Square value:\t',r2_score(ytest,ypred))
print()
print('F1 Score value:\t',f1_score(ytest,ypred))
print()
print('Recall Score value:\t',recall_score(ytest,ypred))
print()
print('Precision Score value:\t',precision_score(ytest,ypred,zero_division=0))
support_vectors_non_lin = non_lin_svc.support_vectors_


Hyperparameter:
 {'C': 1.0, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Predicted price:
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Mean Squared Error:	 0.21

RMSE:	 0.458257569495584

R-Square value:	 -0.26582278481012667

F1 Score value:	 0.0

Recall Score value:	 0.0

Precision Score value:	 0.0


In [14]:
# Hyperparameter Tuning
# For Non-linear SVM (RBF kernel)
param_grid_rbf = {'C': [0.001, 0.01, 0.1, 1, 10], 'gamma': [0.001, 0.01, 0.1, 1]}
grid_rbf = GridSearchCV(SVC(kernel='rbf'), param_grid=param_grid_rbf, cv=5)
grid_rbf.fit(xtrain, ytrain)
best_rbf_svc = grid_rbf.best_estimator_

In [15]:
# Support Vectors
n_support_vectors_rbf = len(best_rbf_svc.support_)

# Model Comparison
rbf_svc_accuracy = best_rbf_svc.score(xtest, ytest)

print("\n\nSupport Vectors in RBF SVM:", n_support_vectors_rbf)
print()
print("\nAccuracy of RBF SVM:", rbf_svc_accuracy)



Support Vectors in RBF SVM: 722


Accuracy of RBF SVM: 0.79


In [None]:
# Decision Boundary Visualization for a 2D plot (assuming you have two features)
def plot_decision_boundary(clf, X, y, title):
    clf.fit(X, y)
    h = 2  # Increase the step size
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title(title)

plt.figure(figsize=(12, 5))
plt.subplot(121)
xtrain_np = xtrain.values  # Convert xtrain to a NumPy array
plot_decision_boundary(best_linear_svc, xtrain_np[:, :2], ytrain, "Linear SVM Decision Boundary")
plt.subplot(122)
plot_decision_boundary(best_rbf_svc, xtrain_np[:, :2], ytrain, "RBF SVM Decision Boundary")
plt.show()


In [16]:
#SVR with Non linear kernal - rbf
support_reg=SVR()
print('='*100)
print('Hyperparameter:\n',support_reg.get_params())
print('='*100)
#Train phase
support_reg.fit(xtrain,ytrain)
#Testing phase
ypred=support_reg.predict(xtest)
print('Predicted price:\n',ypred)
print('='*100)
#Performance measures
print('Mean Squared Error:\t',mean_squared_error(ytest,ypred))
print()
print('RMSE:\t',np.sqrt(mean_squared_error(ytest,ypred)))
print()
print('R-Square value:\t',r2_score(ytest,ypred))

Hyperparameter:
 {'C': 1.0, 'cache_size': 200, 'coef0': 0.0, 'degree': 3, 'epsilon': 0.1, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Predicted price:
 [0.10042254 0.10008862 0.10023459 0.10002392 0.09989947 0.09983988
 0.10025629 0.09986388 0.09999985 0.10000313 0.1002814  0.0993424
 0.09953842 0.09950034 0.09965678 0.09963458 0.09970319 0.09947028
 0.10000169 0.10020762 0.10026903 0.09995559 0.09996819 0.0997577
 0.09968637 0.09965524 0.09958266 0.09960992 0.099721   0.09985144
 0.09979601 0.09969317 0.10014806 0.0995974  0.09962899 0.09946822
 0.10002797 0.09965253 0.09984964 0.09980977 0.1000819  0.09995255
 0.10013379 0.10023827 0.09988894 0.09947107 0.0998148  0.09962866
 0.09983026 0.09979999 0.09990035 0.09958882 0.09937894 0.09990013
 0.09926755 0.10025442 0.10037225 0.09991549 0.09968162 0.09988731
 0.10006816 0.09973524 0.10007506 0.09983573 0.10003958 0.10007364
 0.10013147 0.10033241 0.10010704 0.09962381 0.09970434

# SVR with Non linear kernal - linear

In [17]:
#SVR with Non linear kernal - linear
support_reg=SVR(kernel='linear')
print('='*100)
print('Hyperparameter:\n',support_reg.get_params())
print('='*100)
#Train phase
support_reg.fit(xtrain,ytrain)
#Testing phase
ypred=support_reg.predict(xtest)
print('Predicted price:\n',ypred)
print('='*100)
#Performance measures
print('Mean Squared Error:\t',mean_squared_error(ytest,ypred))
print()
print('RMSE:\t',np.sqrt(mean_squared_error(ytest,ypred)))
print()
print('R-Square value:\t',r2_score(ytest,ypred))


Hyperparameter:
 {'C': 1.0, 'cache_size': 200, 'coef0': 0.0, 'degree': 3, 'epsilon': 0.1, 'gamma': 'scale', 'kernel': 'linear', 'max_iter': -1, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Predicted price:
 [-575.36725096 -467.96657376 -473.09642486  307.20193429  107.18898793
 -119.5582314  -260.7762392  -138.13820414 -402.28533797    4.21568341
 -353.94393234  431.7150578   323.65113158  711.72948404   73.70597883
   89.12649913  701.34525581  221.23289247 -279.35602407 -264.39973025
 -216.71538167 -103.12929992   -8.04839711   -4.75655465  271.28206868
  722.97652526  536.30484195  471.91586776  616.80668588  168.97672601
 -280.23625736  -97.79457049   16.5298719   549.34170146  369.43466802
  500.86729093 -513.99680643   71.07591839  -99.2236505   485.36347146
 -413.3245378  -188.10716061 -411.59468703 -399.17864995 -132.3966773
  617.04153152  183.61275859  165.59307356 -188.67982845  174.77146192
   61.14053531  174.85273786  767.33115525 -240.35924773  901.40741439
 -439.4

# SVR with Non linear kernal - sigmoid

In [18]:
#SVR with Non linear kernal - sigmoid
support_reg=SVR(kernel='sigmoid')
print('='*100)
print('Hyperparameter:\n',support_reg.get_params())
print('='*100)
#Train phase
support_reg.fit(xtrain,ytrain)
#Testing phase
ypred=support_reg.predict(xtest)
print('Predicted price:\n',ypred)
print('='*100)
#Performance measures
print('Mean Squared Error:\t',mean_squared_error(ytest,ypred))
print()
print('RMSE:\t',np.sqrt(mean_squared_error(ytest,ypred)))
print()
print('R-Square value:\t',r2_score(ytest,ypred))

Hyperparameter:
 {'C': 1.0, 'cache_size': 200, 'coef0': 0.0, 'degree': 3, 'epsilon': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid', 'max_iter': -1, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Predicted price:
 [ 21.53674091 -30.94207546  30.66873595   2.11192176  28.9120205
 -30.87969021  22.97377986  34.42658938  31.55503283  -2.64263191
  20.65364167  29.85426272 -23.2936724  -62.74731236 -35.07244405
 -56.46440259 -70.77241256  36.71406368  36.92430513  -8.39721128
   2.3834714   36.65828214  34.09649531  37.11889384 -14.30168403
 -67.24843892 -35.37786343 -20.02102685 -66.52183601 -73.47775872
  37.22635046 -53.74022159  28.7243771  -67.6532148  -41.8234665
 -54.05939666  35.65741274  30.15695516 -35.74789938 -75.93354901
  33.90125002  34.34202617  24.12250362  20.11258519  35.01072595
 -33.93527915 -70.36815023  33.8717662   36.03270081  29.23430471
  29.04405303 -10.44461304 -51.11957442 -40.63834787 -28.34297431
 -16.29718621  11.67547009  36.10313529 -10.9225281  -72.28842