In [4]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV,StratifiedKFold,cross_val_score
from sklearn.metrics import confusion_matrix ,classification_report,accuracy_score,log_loss,roc_auc_score, roc_curve
from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler
import matplotlib.pyplot as plt
import os 
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import BernoulliNB,GaussianNB
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import *
import warnings
warnings.simplefilter('ignore')
os.chdir('D:/Datasets')

In [5]:
kyp =pd.read_csv('Kyphosis.csv')
y = kyp['Kyphosis']
X = kyp.drop('Kyphosis',axis=1)



X_train, X_test, y_train, y_test = train_test_split(X, y,
                                   test_size=0.3,
                                   random_state=24,stratify=y)   # stratify is dividing the dataset into equally like 60 and 40 automatically


In [14]:
# Initialize a Support Vector Classifier (SVC) with a linear kernel and C=3.5
svc = SVC(
    # Kernel type
    kernel='linear',
    # Regularization parameter
    C=3.5
)

# Fit the SVC model to the training data
svc.fit(
    # Training features
    X_train,
    # Training target
    y_train
)

# Use the trained model to make predictions on the test data
y_pred = svc.predict(
    # Test features
    X_test
)

# Calculate the accuracy of the predictions
accuracy = accuracy_score(
    # Actual target values
    y_test,
    # Predicted target values
    y_pred
)

# Print the accuracy
print(accuracy)


0.76


In [15]:
# Initialize a Support Vector Classifier (SVC) with an RBF kernel, C=1, and gamma=3
svc = SVC(
    # Kernel type
    kernel='rbf',
    # Regularization parameter
    C=1,
    # Gamma parameter
    gamma=3
)

# Fit the SVC model to the training data
svc.fit(
    # Training features
    X_train,
    # Training target
    y_train
)

# Use the trained model to make predictions on the test data
y_pred = svc.predict(
    # Test features
    X_test
)

# Calculate the accuracy of the predictions
accuracy = accuracy_score(
    # Actual target values
    y_test,
    # Predicted target values
    y_pred
)

# Print the accuracy
print(accuracy)

0.8


In [17]:
# Import the StratifiedKFold class from sklearn.model_selection
from sklearn.model_selection import StratifiedKFold

# Initialize a Support Vector Classifier (SVC)
svm = SVC()

# Initialize a StandardScaler
std_scaler = StandardScaler()

# Initialize a Min-Max Scaler
min_max_scalar = MinMaxScaler()

# Initialize a K-Nearest Neighbors Classifier
knn = KNeighborsClassifier()

# Create a Pipeline with the KNN Classifier and the Scalers
pipe = Pipeline([('SCL', None), ('KNN', knn)])

# Define the grid of hyperparameters for the KNN Classifier
param = {
    # Number of neighbors for the KNN Classifier
    'KNN__n_neighbors': np.arange(1, 17),
    # Scaler to use
    'SCL': [None, std_scaler, min_max_scalar]
}

# Create a Stratified K-Fold cross-validation object
Kfold = StratifiedKFold(
    # Number of folds for the cross-validation
    n_splits=5,
    # Shuffle the data before splitting it into folds
    shuffle=True,
    # Random seed for shuffling
    random_state=24
)

# Create a GridSearchCV object
gcv = GridSearchCV(
    # Model to be tuned
    pipe,
    # Grid of hyperparameters
    param_grid=param,
    # Cross-validation object
    cv=Kfold,
    # Metric to use for evaluating the model
    scoring='neg_log_loss'
)

# Fit the grid search model to the data
gcv.fit(X, y)

# Print the best combination of hyperparameters found during the grid search
print(gcv.best_params_)

# Print the best score (i.e., the lowest negative log loss) found during the grid search
print(gcv.best_score_)

{'KNN__n_neighbors': 9, 'SCL': MinMaxScaler()}
-0.3541342613432673


# SVC using StratifiedKFold

In [18]:
# Import the StratifiedKFold class from sklearn.model_selection
from sklearn.model_selection import StratifiedKFold

# Initialize a Support Vector Classifier (SVC)
svm = SVC()

# Create a Stratified K-Fold cross-validation object
Kfold = StratifiedKFold(
    # Number of folds for the cross-validation
    n_splits=5,
    # Shuffle the data before splitting it into folds
    shuffle=True,
    # Random seed for shuffling
    random_state=24
)

# Define the grid of hyperparameters for the SVC
param = {
    # Kernel type for the SVC
    'kernel': ['rbf', 'linear'],
    # Regularization parameter (C)
    'C': np.linspace(0.01, 4, 10),
    # Gamma parameter
    'gamma': np.linspace(0.01, 4, 10)
}

# Create a GridSearchCV object
gcv = GridSearchCV(
    # Model to be tuned
    svm,
    # Grid of hyperparameters
    param_grid=param,
    # Cross-validation object
    cv=Kfold
)

# Fit the grid search model to the data
gcv.fit(X, y)

# Print the best combination of hyperparameters found during the grid search
print(gcv.best_params_)

# Print the best score (i.e., the highest accuracy) found during the grid search
print(gcv.best_score_)

{'C': 0.8966666666666667, 'gamma': 0.01, 'kernel': 'rbf'}
0.8147058823529412


# using std_scaler scaling and min_max_scalar

In [20]:
# Import the StratifiedKFold class from sklearn.model_selection
from sklearn.model_selection import StratifiedKFold

# Initialize a Support Vector Classifier (SVC)
svm = SVC()

# Initialize a StandardScaler
std_scaler = StandardScaler()

# Initialize a Min-Max Scaler
min_max_scalar = MinMaxScaler()

# Initialize a K-Nearest Neighbors Classifier
knn = KNeighborsClassifier()

# Create a Pipeline with the SVC and the Scalers
pipe = Pipeline([('SCL', None), ('SVM', svm)])

# Define the grid of hyperparameters for the SVC
param = {
    # Kernel type for the SVC
    'SVM__kernel': ['rbf', 'linear'],
    # Regularization parameter (C) for the SVC
    'SVM__C': np.linspace(0.01, 4, 10),
    # Gamma parameter for the SVC
    'SVM__gamma': np.linspace(0.01, 4, 10),
    # Scaler to use
    "SCL": [None, std_scaler, min_max_scalar]
}

# Create a Stratified K-Fold cross-validation object
Kfold = StratifiedKFold(
    # Number of folds for the cross-validation
    n_splits=5,
    # Shuffle the data before splitting it into folds
    shuffle=True,
    # Random seed for shuffling
    random_state=24
)

# Create a GridSearchCV object
gcv = GridSearchCV(
    # Model to be tuned
    pipe,
    # Grid of hyperparameters
    param_grid=param,
    # Cross-validation object
    cv=Kfold
)

# Fit the grid search model to the data
gcv.fit(X, y)

# Print the best combination of hyperparameters found during the grid search
print(gcv.best_params_)

# Print the best score (i.e., the highest accuracy) found during the grid search
print(gcv.best_score_)

{'SCL': StandardScaler(), 'SVM__C': 1.7833333333333334, 'SVM__gamma': 0.01, 'SVM__kernel': 'linear'}
0.8154411764705882
