In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime, time
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.model_selection import KFold, RepeatedStratifiedKFold, GridSearchCV, cross_validate
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.utils.testing import all_estimators
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold

## Load dataset

In [None]:
y_train=pd.read_csv("...")
y_test=pd.read_csv("...")
x_train_lr=pd.read_csv("...")
x_test_lr=pd.read_csv("...")

In [None]:
# Create the RFE object and compute a cross-validated score.
rfc = RandomForestClassifier()

min_features_to_select = 1  # Minimum number of features to consider
rfecv = RFECV(estimator=rfc, step=1, cv=StratifiedKFold(5),
              scoring='roc_auc',
              min_features_to_select=min_features_to_select)
rfecv.fit(x_train_lr, y_train)

print("Optimal number of features : %d" % rfecv.n_features_)

# summarize all features
for i in range(x_train_lr.shape[1]):
    print('Column: %d, Selected %s, Rank: %.3f' % (i, rfecv.support_[i], rfecv.ranking_[i]))

In [None]:
x_train_rfe=rfecv.transform(x_train_lr)
x_test_rfe=rfecv.transform(x_test_lr)


#Feature names
feature_names=np.array(x_train_lr.columns.tolist())
feature_names=feature_names[rfecv.support_]
feature_names

## SVM classifier

In [None]:
parameters=[
    {"C":[1,10,100,1000],"kernel":["linear"]},
    {"C":[1,10,100,1000],"kernel":["rbf"],"gamma":[0.01,0.001,0.0001]},
    {"C":[1,10,100,1000],"kernel":["sigmoid"],"gamma":[0.01,0.001,0.0001]}
]

kfold_cv=KFold(n_splits=5,shuffle=True,random_state=10)
clf = GridSearchCV(SVC(random_state=20),parameters,verbose=3,cv=kfold_cv,scoring='roc_auc',n_jobs=-1)
clf.fit(x_train_rfe,y_train)
best=clf.best_estimator_
print("Optimal hyperparameters=",clf.best_estimator_)


y_pred_train = best.predict(x_train_rfe)
print('Final AUC of train set=',roc_auc_score(y_train,y_pred_train))
print('Final accuracy of train set=',accuracy_score(y_train,y_pred_train))


y_pred = best.predict(x_test_rfe)
print('Final AUC of test set=',roc_auc_score(y_test,y_pred))
print('Final accuracy of test set=',accuracy_score(y_test,y_pred))