In [1]:
# Import required libraries
import pandas as pd
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from rotation_forest import RotationForestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier

#Load the dataset
df = pd.read_csv('german_credit_data.csv')

# Drop unnecessary columns
df.drop(['Sex', 'Job', 'Credit amount'], axis=1, inplace=True)

# Convert categorical variables to numerical form using label encoding
le = LabelEncoder()
#df['Sex'] = le.fit_transform(df['Sex'])
df['Housing'] = le.fit_transform(df['Housing'])
df['Risk'] = le.fit_transform(df['Risk'])
df['Saving accounts'] = le.fit_transform(df['Saving accounts'])
df['Checking account'] = le.fit_transform(df['Checking account'])
df['Purpose'] = le.fit_transform(df['Purpose'])

# Split the data into features and target
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers
rf = RandomForestClassifier(n_estimators=100, random_state=42)
et = ExtraTreesClassifier(n_estimators=100, random_state=42)
rotf = RotationForestClassifier(n_estimators=100, random_state=42)

# Fit models to training data
rf.fit(X_train, y_train)
et.fit(X_train, y_train)
rotf.fit(X_train, y_train)

# Predict labels for test data
rf_pred = rf.predict(X_test)
et_pred = et.predict(X_test)
rotf_pred = rotf.predict(X_test)

# Evaluate model performance
print('Rotation Forest: ')
print('Accuracy Score: {:.4f}'.format(accuracy_score(y_test, rotf_pred)))
print('Precision Score: {:.4f}'.format(precision_score(y_test, rotf_pred)))
print('Recall Score: {:.4f}'.format(recall_score(y_test, rotf_pred)))
print('F1 Score: {:.4f}'.format(f1_score(y_test, rotf_pred)))

print('\nRandom Forest: ')
print('Accuracy Score: {:.4f}'.format(accuracy_score(y_test, rf_pred)))
print('Precision Score: {:.4f}'.format(precision_score(y_test, rf_pred)))
print('Recall Score: {:.4f}'.format(recall_score(y_test, rf_pred)))
print('F1 Score: {:.4f}'.format(f1_score(y_test, rf_pred)))

print('\nExtra Trees: ')
print('Accuracy Score: {:.4f}'.format(accuracy_score(y_test, et_pred)))
print('Precision Score: {:.4f}'.format(precision_score(y_test, et_pred)))
print('Recall Score: {:.4f}'.format(recall_score(y_test, et_pred)))
print('F1 Score: {:.4f}'.format(f1_score(y_test, et_pred)))

Rotation Forest: 
Accuracy Score: 0.7100
Precision Score: 0.8168
Recall Score: 0.7589
F1 Score: 0.7868

Random Forest: 
Accuracy Score: 0.7600
Precision Score: 0.8079
Recall Score: 0.8652
F1 Score: 0.8356

Extra Trees: 
Accuracy Score: 0.7550
Precision Score: 0.7987
Recall Score: 0.8723
F1 Score: 0.8339


In [2]:
import pandas as pd
from sklearn.model_selection import cross_validate, KFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier, \
    VotingClassifier, StackingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector as selector
from rotation_forest import RotationForestClassifier
from sklearn.preprocessing import LabelEncoder

#Load the dataset
df = pd.read_csv('german_credit_data.csv')

# Drop unnecessary columns
df.drop(['Sex', 'Job', 'Credit amount'], axis=1, inplace=True)

# Convert categorical variables to numerical form using label encoding
le = LabelEncoder()
#df['Sex'] = le.fit_transform(df['Sex'])
df['Housing'] = le.fit_transform(df['Housing'])
df['Risk'] = le.fit_transform(df['Risk'])
df['Saving accounts'] = le.fit_transform(df['Saving accounts'])
df['Checking account'] = le.fit_transform(df['Checking account'])
df['Purpose'] = le.fit_transform(df['Purpose'])

X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Define the classifiers
rotation_forest = RotationForestClassifier(n_estimators=10,
                                        random_state=42)

random_forest = RandomForestClassifier(n_estimators=10,
                                        random_state=42)

extra_trees = ExtraTreesClassifier(n_estimators=10,
                                    random_state=42)

# Define the cross-validation
cv = KFold(n_splits=10, random_state=42, shuffle=True)

# Define the scoring metrics
scoring = ['accuracy', 'f1', 'precision', 'recall']

# Perform cross-validation and print the results
for clf, name in [(rotation_forest, 'Rotation Forest'),
                  (random_forest, 'Random Forest'),
                  (extra_trees, 'Extra Trees')]:

    results = cross_validate(clf, X, y, cv=cv, scoring=scoring)

    accuracy = results['test_accuracy'].mean()
    f1 = results['test_f1'].mean()
    precision = results['test_precision'].mean()
    recall = results['test_recall'].mean()

    print(f'{name}:')
    print(f'Accuracy: {accuracy:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print('-' * 50)

Rotation Forest:
Accuracy: 0.6630
F1 Score: 0.7561
Precision: 0.7661
Recall: 0.7488
--------------------------------------------------
Random Forest:
Accuracy: 0.6990
F1 Score: 0.7864
Precision: 0.7807
Recall: 0.7948
--------------------------------------------------
Extra Trees:
Accuracy: 0.7030
F1 Score: 0.7890
Precision: 0.7821
Recall: 0.7972
--------------------------------------------------
