In [8]:
# IMPORT LIBRIES
import time
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler

from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
from sklearn.preprocessing import OrdinalEncoder

# Basic classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

# Ensemble classifiers
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier, VotingClassifier, StackingClassifier

# Boosting libraries
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier



In [9]:
# IMPORT DATASET
df = pd.read_csv('https://raw.githubusercontent.com/rohitmande-inttrvu/finance_loan_approval/refs/heads/main/Finance.csv')
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [10]:
# DROP LOAD_ID
df.drop(columns=['Loan_ID'],inplace=True)

In [11]:
df['Loan_Status'].replace({'Y':1,'N':0},inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Loan_Status'].replace({'Y':1,'N':0},inplace=True)
  df['Loan_Status'].replace({'Y':1,'N':0},inplace=True)


In [12]:
classifiers = {
    'Logistic Regression': LogisticRegression(),
    'KNN': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(),
    'Gaussian': GaussianNB(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'AdaBoost': AdaBoostClassifier(),
    'Bagging': BaggingClassifier(),
    'Extra Trees': ExtraTreesClassifier(),
    'XGBoost': XGBClassifier(),
    'LightGBM': LGBMClassifier()
}

In [13]:
#CONFIGURE EXPERIMENT
import mlflow
import dagshub
dagshub.init(repo_owner='yogibaba7', repo_name='loan_approval_prediction', mlflow=True)
# set tracking uri
mlflow.set_tracking_uri('https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/')
# create a experiment
mlflow.set_experiment('exp4_bestmodel')

<Experiment: artifact_location='mlflow-artifacts:/95242b81f76f43a8a8964f277cdd7dde', creation_time=1744463603346, experiment_id='3', last_update_time=1744463603346, lifecycle_stage='active', name='exp4_bestmodel', tags={}>

In [14]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

with mlflow.start_run(nested=True,description='this experiment is created for find the  best scaler'):
    X_train,X_test,y_train,y_test = train_test_split(df.drop(columns=['Loan_Status']),df['Loan_Status'],test_size=0.2,random_state=42)
    # CATEGORICAL COLUMNS AND NUMERICAL COLUMNS
    cat_cols = []
    num_cols = []
    columns = X_train.columns
    for col in columns:
        if df[col].nunique()<5:
            cat_cols.append(col)
        else:
            num_cols.append(col)
    print(f"Categorical cols : {cat_cols}")
    print(f"Numerical cols : {num_cols}")

    # encoding
    oe = OrdinalEncoder()
    X_train[cat_cols] = oe.fit_transform(X_train[cat_cols])
    X_test[cat_cols] = oe.transform(X_test[cat_cols])

    # iterative imputer
    iterativeimputer = IterativeImputer()
    X_train = iterativeimputer.fit_transform(X_train)
    X_test = iterativeimputer.transform(X_test)

    X_train = pd.DataFrame(X_train,columns=columns)
    X_test = pd.DataFrame(X_test,columns=columns)

    # scaling
    scaler = RobustScaler()
    X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
    X_test[num_cols] = scaler.transform(X_test[num_cols])

    
    for classifier_name,classifier in classifiers.items():
        with mlflow.start_run(nested=True,description=f"in this run {classifier} trained"):

            # log imputer 
            mlflow.log_param('imputer','iterativeimputer')
            mlflow.log_param('scaler',scaler)
            mlflow.log_param(classifier_name,classifier)

            # model training
            classifier.fit(X_train,y_train)
            y_pred = classifier.predict(X_test)

            
            # log model
            mlflow.sklearn.log_model(classifier,classifier_name)

            # model evaluations
            accuracy = accuracy_score(y_test,y_pred)
            precision = precision_score(y_test,y_pred)
            recall = recall_score(y_test,y_pred)
            f1 = f1_score(y_test,y_pred)

            # log metrics 
            mlflow.log_metric('accuracy',accuracy)
            mlflow.log_metric('precision',precision)
            mlflow.log_metric('recall',recall)
            mlflow.log_metric('f1',f1)

            # Save and log the notebook
            import os
            notebook_path = "exp1_baseline_model.ipynb"
            os.system(f"jupyter nbconvert --to notebook --execute --inplace {notebook_path}")
            mlflow.log_artifact(notebook_path)

            
            print(f"accuracy score : {accuracy}")
            print(f"precision score : {precision}")
            print(f"recall score : {recall}")
            print(f"f1 score : {f1}")     
            print("---------------------------------------------------------")

Categorical cols : ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Credit_History', 'Property_Area']
Numerical cols : ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term']


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


accuracy score : 0.7886178861788617
precision score : 0.7596153846153846
recall score : 0.9875
f1 score : 0.8586956521739131
---------------------------------------------------------
üèÉ View run popular-shoat-546 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/beef3c74e3644aea8a44ef5a4d3b0000
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3




accuracy score : 0.6666666666666666
precision score : 0.6893203883495146
recall score : 0.8875
f1 score : 0.7759562841530054
---------------------------------------------------------
üèÉ View run beautiful-robin-510 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/44a0d24bf2bd421b9cdc89a3e625797f
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3




accuracy score : 0.6991869918699187
precision score : 0.7590361445783133
recall score : 0.7875
f1 score : 0.7730061349693251
---------------------------------------------------------
üèÉ View run efficient-fly-446 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/0d2be137556b4dec996f0856c6bb8940
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3




accuracy score : 0.7804878048780488
precision score : 0.7676767676767676
recall score : 0.95
f1 score : 0.8491620111731844
---------------------------------------------------------
üèÉ View run rare-snake-443 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/8ff910e929fc4dab8430aa2e406dedc6
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3




accuracy score : 0.6260162601626016
precision score : 0.6416666666666667
recall score : 0.9625
f1 score : 0.77
---------------------------------------------------------
üèÉ View run puzzled-grouse-481 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/993805b426224a27b94ca5e68872cc9f
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3




accuracy score : 0.7804878048780488
precision score : 0.7572815533980582
recall score : 0.975
f1 score : 0.8524590163934426
---------------------------------------------------------
üèÉ View run popular-rat-293 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/b6bc4b72330f4e0bbb041e05b26cd9a3
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3




accuracy score : 0.7560975609756098
precision score : 0.7551020408163265
recall score : 0.925
f1 score : 0.8314606741573034
---------------------------------------------------------
üèÉ View run bedecked-moose-449 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/0476c967ab9040398578a56e2463b1d5
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3




accuracy score : 0.7886178861788617
precision score : 0.7596153846153846
recall score : 0.9875
f1 score : 0.8586956521739131
---------------------------------------------------------
üèÉ View run brawny-gnu-589 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/62d060ffc4c749efab208c4e3003f5f2
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3




accuracy score : 0.7235772357723578
precision score : 0.75
recall score : 0.8625
f1 score : 0.8023255813953488
---------------------------------------------------------
üèÉ View run bouncy-gnu-408 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/cc09bc7eb8164d4382aecae1d315803e
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3




accuracy score : 0.7804878048780488
precision score : 0.7623762376237624
recall score : 0.9625
f1 score : 0.850828729281768
---------------------------------------------------------
üèÉ View run delightful-asp-902 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/2f55bbd225d84200a0a54b719f706ca7
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3




accuracy score : 0.7723577235772358
precision score : 0.76
recall score : 0.95
f1 score : 0.8444444444444444
---------------------------------------------------------
üèÉ View run abundant-cod-854 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/34a4e6523e314afd9ddcd16bd6518458
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3
[LightGBM] [Info] Number of positive: 342, number of negative: 149
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001044 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 406
[LightGBM] [Info] Number of data points in the train set: 491, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.696538 -> initscore=0.830864
[LightGBM] [Info] Start training from score 0.830864




accuracy score : 0.7804878048780488
precision score : 0.7676767676767676
recall score : 0.95
f1 score : 0.8491620111731844
---------------------------------------------------------
üèÉ View run gaudy-koi-621 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/1ce83d20fa4140b4a9fa25fb930f1411
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3
üèÉ View run sedate-perch-475 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3/runs/ccd423c4428f4c178464ab1b62ddb217
üß™ View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/3
