In [42]:
# IMPORT LIBRIES
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler,RobustScaler,MaxAbsScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
from sklearn.preprocessing import OrdinalEncoder
from sklearn.ensemble import RandomForestClassifier


In [43]:
df = pd.read_csv('https://raw.githubusercontent.com/rohitmande-inttrvu/finance_loan_approval/refs/heads/main/Finance.csv')
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [44]:
# DROP LOAD_ID
df.drop(columns=['Loan_ID'],inplace=True)

In [45]:
df['Loan_Status'].replace({'Y':1,'N':0},inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Loan_Status'].replace({'Y':1,'N':0},inplace=True)
  df['Loan_Status'].replace({'Y':1,'N':0},inplace=True)


In [46]:
#CONFIGURE EXPERIMENT
import mlflow
import dagshub
dagshub.init(repo_owner='yogibaba7', repo_name='loan_approval_prediction', mlflow=True)
# set tracking uri
mlflow.set_tracking_uri('https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/')
# create a experiment
mlflow.set_experiment('exp3_scaling')

<Experiment: artifact_location='mlflow-artifacts:/dfe82e3a2312477987a03ff48b7ea109', creation_time=1744459320293, experiment_id='2', last_update_time=1744459320293, lifecycle_stage='active', name='exp3_scaling', tags={}>

In [47]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

with mlflow.start_run(nested=True,description='this experiment is created for find the  best scaler'):
    X_train,X_test,y_train,y_test = train_test_split(df.drop(columns=['Loan_Status']),df['Loan_Status'],test_size=0.2,random_state=42)
    # CATEGORICAL COLUMNS AND NUMERICAL COLUMNS
    cat_cols = []
    num_cols = []
    columns = X_train.columns
    for col in columns:
        if df[col].nunique()<5:
            cat_cols.append(col)
        else:
            num_cols.append(col)
    print(f"Categorical cols : {cat_cols}")
    print(f"Numerical cols : {num_cols}")

    # encoding
    oe = OrdinalEncoder()
    X_train[cat_cols] = oe.fit_transform(X_train[cat_cols])
    X_test[cat_cols] = oe.transform(X_test[cat_cols])

    # iterative imputer
    iterativeimputer = IterativeImputer()
    X_train = iterativeimputer.fit_transform(X_train)
    X_test = iterativeimputer.transform(X_test)

    X_train = pd.DataFrame(X_train,columns=columns)
    X_test = pd.DataFrame(X_test,columns=columns)

    scalers = [StandardScaler(),MinMaxScaler(),MaxAbsScaler(),RobustScaler()]
    for scaler in scalers:
        with mlflow.start_run(nested=True,description=f"this run is done by {scaler}"):
            # scaling
            X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
            X_test[num_cols] = scaler.transform(X_test[num_cols])

            # log imputer 
            mlflow.log_param('imputer','iterativeimputer')
            mlflow.log_param('scaler',scaler)

            # model training
            lr = RandomForestClassifier()
            lr.fit(X_train,y_train)
            y_pred = lr.predict(X_test)

            # log model
            mlflow.sklearn.log_model(lr,'LogisticRegression')

            # model evaluations
            accuracy = accuracy_score(y_test,y_pred)
            precision = precision_score(y_test,y_pred)
            recall = recall_score(y_test,y_pred)
            f1 = f1_score(y_test,y_pred)

            # log metrics 
            mlflow.log_metric('accuracy',accuracy)
            mlflow.log_metric('precision',precision)
            mlflow.log_metric('recall',recall)
            mlflow.log_metric('f1',f1)

            # Save and log the notebook
            import os
            notebook_path = "exp1_baseline_model.ipynb"
            os.system(f"jupyter nbconvert --to notebook --execute --inplace {notebook_path}")
            mlflow.log_artifact(notebook_path)

            
            print(f"accuracy score : {accuracy}")
            print(f"precision score : {precision}")
            print(f"recall score : {recall}")
            print(f"f1 score : {f1}")     
            print("---------------------------------------------------------")


Categorical cols : ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Credit_History', 'Property_Area']
Numerical cols : ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term']




accuracy score : 0.7723577235772358
precision score : 0.7549019607843137
recall score : 0.9625
f1 score : 0.8461538461538461
---------------------------------------------------------
🏃 View run suave-smelt-416 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/2/runs/9b227a0c63f340c6942a1930baadc408
🧪 View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/2




accuracy score : 0.7560975609756098
precision score : 0.75
recall score : 0.9375
f1 score : 0.8333333333333334
---------------------------------------------------------
🏃 View run learned-shad-314 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/2/runs/e438d3bf54404faa95298b05a2abcb9e
🧪 View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/2




accuracy score : 0.7723577235772358
precision score : 0.7549019607843137
recall score : 0.9625
f1 score : 0.8461538461538461
---------------------------------------------------------
🏃 View run fortunate-kit-944 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/2/runs/3af0c44eb95a45098e8d8e0ca28e3b44
🧪 View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/2




accuracy score : 0.7886178861788617
precision score : 0.77
recall score : 0.9625
f1 score : 0.8555555555555555
---------------------------------------------------------
🏃 View run gaudy-hog-113 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/2/runs/87028b07940b4a96bbb1223c6dd8c62c
🧪 View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/2
🏃 View run unruly-conch-188 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/2/runs/39e839fad46249088335fb3f05cc4456
🧪 View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/2
