In [17]:
!pip install scikit-learn pandas xgboost lightgbm

Defaulting to user installation because normal site-packages is not writeable
Collecting xgboost
  Downloading xgboost-2.1.4-py3-none-win_amd64.whl.metadata (2.1 kB)
Collecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-win_amd64.whl.metadata (17 kB)
Downloading xgboost-2.1.4-py3-none-win_amd64.whl (124.9 MB)
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   --- ------------------------------------ 11.0/124.9 MB 59.8 MB/s eta 0:00:02
   -------- ------------------------------- 25.2/124.9 MB 65.6 MB/s eta 0:00:02
   ------------ --------------------------- 39.3/124.9 MB 66.9 MB/s eta 0:00:02
   ----------------- ---------------------- 54.5/124.9 MB 67.9 MB/s eta 0:00:02
   ---------------------- ----------------- 69.7/124.9 MB 68.7 MB/s eta 0:00:01
   --------------------------- ------------ 84.4/124.9 MB 68.7 MB/s eta 0:00:01
   ------------------------------- -------- 99.4/124.9 MB 68.9 MB/s eta 0:00:01
   ----------------------------------- --- 112.5/124.9


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [18]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import xgboost as xgb
import lightgbm as lgb
from sklearn.svm import SVC

In [19]:
dataset = pd.read_csv('../dataset/dataset.csv')

for column in dataset.columns:
    dataset = dataset.dropna(subset = [column])

dataset['Risk_Level_Binary'] = dataset['Risk Level'].map({'High': 1, 'Low': 0})
y = dataset['Risk_Level_Binary']
dataset = dataset.drop(['Risk Level', 'Risk_Level_Binary'], axis =1)


In [20]:

X_train, X_test, y_train, y_test = train_test_split(dataset, y, test_size = .2, random_state = 42)

scaler = StandardScaler()
scaler.fit(X_train)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [23]:
results = {}
models = {
    'Logistic Regression': LogisticRegression(),
    'KNN': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'MLP': MLPClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'XGBoost': xgb.XGBClassifier(),
    'LightGBM': lgb.LGBMClassifier(),
    'SVC': SVC(),
}

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    results[name] = {
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1': f1,
    }
    
    print(f"Model: {name}")
    print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}")
    print("-" * 50)

          

Model: Logistic Regression
Accuracy: 0.9786, Precision: 0.9655, Recall: 0.9767, F1: 0.9711
--------------------------------------------------
Model: KNN
Accuracy: 0.9744, Precision: 0.9545, Recall: 0.9767, F1: 0.9655
--------------------------------------------------
Model: Decision Tree
Accuracy: 0.9744, Precision: 0.9651, Recall: 0.9651, F1: 0.9651
--------------------------------------------------
Model: Random Forest
Accuracy: 0.9872, Precision: 0.9770, Recall: 0.9884, F1: 0.9827
--------------------------------------------------




Model: MLP
Accuracy: 0.9786, Precision: 0.9655, Recall: 0.9767, F1: 0.9711
--------------------------------------------------
Model: Gradient Boosting
Accuracy: 0.9957, Precision: 0.9885, Recall: 1.0000, F1: 0.9942
--------------------------------------------------
Model: XGBoost
Accuracy: 0.9915, Precision: 0.9884, Recall: 0.9884, F1: 0.9884
--------------------------------------------------
[LightGBM] [Info] Number of positive: 378, number of negative: 554
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000253 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 271
[LightGBM] [Info] Number of data points in the train set: 932, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.405579 -> initscore=-0.382270
[LightGBM] [Info] Start training from score -0.382270
Model: LightGBM
Accuracy: 0.9957, Precision: 0.9885, 

