In [16]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
import pandas as pd
from sklearn.model_selection import cross_validate, KFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier, \
    VotingClassifier, StackingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector as selector
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

#Load the dataset
df = pd.read_csv('german_credit_data.csv')

# Drop unnecessary columns
df.drop(['Sex', 'Job', 'Credit amount'], axis=1, inplace=True)

# Convert categorical variables to numerical form using label encoding
le = LabelEncoder()
#df['Sex'] = le.fit_transform(df['Sex'])
df['Housing'] = le.fit_transform(df['Housing'])
df['Risk'] = le.fit_transform(df['Risk'])
df['Saving accounts'] = le.fit_transform(df['Saving accounts'])
df['Checking account'] = le.fit_transform(df['Checking account'])
df['Purpose'] = le.fit_transform(df['Purpose'])

X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Define the classifiers
ada_boost = AdaBoostClassifier(n_estimators=10,
                                        random_state=42)

gradient_boost = GradientBoostingClassifier(n_estimators=10,
                                        random_state=42)

xg_boost = xgb.XGBClassifier(n_estimators=10,
                                        random_state=42)


cat_boost = CatBoostClassifier(n_estimators=10,
                                        random_state=42)


# Define the cross-validation
cv = KFold(n_splits=10, random_state=42, shuffle=True)

# Define the scoring metrics
scoring = ['accuracy', 'f1', 'precision', 'recall']

# Perform cross-validation and print the results
for clf, name in [(ada_boost, 'Ada Boost'),
                  (gradient_boost, 'Gradient Boost'),
                 (xg_boost, 'Xg Boost'),
                  (cat_boost, 'Cat Boost')]:

    results = cross_validate(clf, X, y, cv=cv, scoring=scoring)

    accuracy = results['test_accuracy'].mean()
    f1 = results['test_f1'].mean()
    precision = results['test_precision'].mean()
    recall = results['test_recall'].mean()

    print(f'{name}:')
    print(f'Accuracy: {accuracy:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print('-' * 50)

Ada Boost:
Accuracy: 0.7140
F1 Score: 0.8046
Precision: 0.7652
Recall: 0.8502
--------------------------------------------------
Gradient Boost:
Accuracy: 0.7190
F1 Score: 0.8273
Precision: 0.7248
Recall: 0.9661
--------------------------------------------------
Xg Boost:
Accuracy: 0.7130
F1 Score: 0.8047
Precision: 0.7636
Recall: 0.8519
--------------------------------------------------
Learning rate set to 0.5
0:	learn: 0.6107359	total: 1.05ms	remaining: 9.43ms
1:	learn: 0.5660688	total: 1.94ms	remaining: 7.76ms
2:	learn: 0.5360093	total: 2.86ms	remaining: 6.67ms
3:	learn: 0.5174244	total: 3.65ms	remaining: 5.47ms
4:	learn: 0.5009346	total: 4.6ms	remaining: 4.6ms
5:	learn: 0.4925795	total: 5.5ms	remaining: 3.67ms
6:	learn: 0.4839312	total: 6.34ms	remaining: 2.71ms
7:	learn: 0.4720621	total: 7.23ms	remaining: 1.81ms
8:	learn: 0.4640463	total: 8.04ms	remaining: 892us
9:	learn: 0.4576858	total: 8.77ms	remaining: 0us
Learning rate set to 0.5
0:	learn: 0.6003526	total: 1.33ms	remaining: 1