In [34]:
# IMPORT LIBRIES
import pandas as pd 
import numpy as np 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
from sklearn.model_selection import cross_val_score
import optuna
from sklearn.model_selection import GridSearchCV


In [8]:
# IMPORT DATASET
df = pd.read_csv('https://raw.githubusercontent.com/rohitmande-inttrvu/finance_loan_approval/refs/heads/main/Finance.csv')
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [9]:
# DROP LOAD_ID
df.drop(columns=['Loan_ID'],inplace=True)

In [10]:
df['Loan_Status'].replace({'Y':1,'N':0},inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Loan_Status'].replace({'Y':1,'N':0},inplace=True)
  df['Loan_Status'].replace({'Y':1,'N':0},inplace=True)


In [31]:

X_train,X_test,y_train,y_test = train_test_split(df.drop(columns=['Loan_Status']),df['Loan_Status'],test_size=0.2)
# CATEGORICAL COLUMNS AND NUMERICAL COLUMNS
cat_cols = []
num_cols = []
columns = X_train.columns
for col in columns:
    if df[col].nunique()<5:
        cat_cols.append(col)
    else:
        num_cols.append(col)
print(f"Categorical cols : {cat_cols}")
print(f"Numerical cols : {num_cols}")

# encoding
oe = OrdinalEncoder()
X_train[cat_cols] = oe.fit_transform(X_train[cat_cols])
X_test[cat_cols] = oe.transform(X_test[cat_cols])

# iterative imputer
iterativeimputer = IterativeImputer()
X_train = iterativeimputer.fit_transform(X_train)
X_test = iterativeimputer.transform(X_test)

X_train = pd.DataFrame(X_train,columns=columns)
X_test = pd.DataFrame(X_test,columns=columns)

# scaling
scaler = RobustScaler()
X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
X_test[num_cols] = scaler.transform(X_test[num_cols])

Categorical cols : ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Credit_History', 'Property_Area']
Numerical cols : ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term']


In [32]:
# create objective function
def objective(trial):
    # define parameters
    n_estimator = trial.suggest_int('n_estimator',50,500)
    learning_rate = trial.suggest_float('learning_rate',0.001,0.1)

    # adaboost model
    abc = AdaBoostClassifier(n_estimators=n_estimator,learning_rate=learning_rate)
    score = cross_val_score(abc,X_train,y_train,cv=3,scoring='accuracy').mean()

    return score 

In [33]:
# create a study
study = optuna.create_study(direction='maximize')
study.optimize(objective,n_trials=50)

[I 2025-04-13 17:58:33,927] A new study created in memory with name: no-name-092a1792-0e25-4686-b08e-a0ab1a8716c1
[I 2025-04-13 17:58:34,617] Trial 0 finished with value: 0.8004264551847973 and parameters: {'n_estimator': 62, 'learning_rate': 0.04944769177058639}. Best is trial 0 with value: 0.8004264551847973.
[I 2025-04-13 17:58:36,170] Trial 1 finished with value: 0.8004264551847973 and parameters: {'n_estimator': 137, 'learning_rate': 0.03568500438232562}. Best is trial 0 with value: 0.8004264551847973.
[I 2025-04-13 17:58:38,921] Trial 2 finished with value: 0.8024589755100004 and parameters: {'n_estimator': 259, 'learning_rate': 0.05226514249095221}. Best is trial 2 with value: 0.8024589755100004.
[I 2025-04-13 17:58:44,236] Trial 3 finished with value: 0.8024589755100004 and parameters: {'n_estimator': 499, 'learning_rate': 0.01805760515851072}. Best is trial 2 with value: 0.8024589755100004.
[I 2025-04-13 17:58:48,379] Trial 4 finished with value: 0.8024589755100004 and paramet

In [40]:
#CONFIGURE EXPERIMENT
import mlflow
import dagshub
dagshub.init(repo_owner='yogibaba7', repo_name='loan_approval_prediction', mlflow=True)
# set tracking uri
mlflow.set_tracking_uri('https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/')
# create a experiment
mlflow.set_experiment('exp5_tuneadaboost')

2025/04/13 18:25:20 INFO mlflow.tracking.fluent: Experiment with name 'exp5_tuneadaboost' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/951428d5324c4229a978da9f91415385', creation_time=1744548921203, experiment_id='4', last_update_time=1744548921203, lifecycle_stage='active', name='exp5_tuneadaboost', tags={}>

In [None]:
# gridsearchcv
with mlflow.start_run(nested=True):
    # define parameters
    params = {
        'n_estimators':[50,60,70,80,90,100,120,150,170,200,230,250],
        'learning_rate':[0.01,0.03,0.05,0.07,0.09]
    }
    abc = AdaBoostClassifier()

    gridsearch = GridSearchCV(abc,param_grid=params)
    gridsearch.fit(X_train,y_train)
    best_params = gridsearch.best_params_
    best_score = gridsearch.best_score_


 



🏃 View run dashing-wasp-707 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/4/runs/382a08f55dc84b5fb2fe1cdd35a0c3a8
🧪 View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/4


AttributeError: module 'mlflow.sklearn' has no attribute 'log_params'

In [42]:
# gridsearchcv
with mlflow.start_run(nested=True):
    mlflow.sklearn.log_model(gridsearch,'adaboostclassifier')
    mlflow.log_params(best_params)
    mlflow.log_metric('accuracy',best_score)

    params = gridsearch.cv_results_['params']
    scores = gridsearch.cv_results_['mean_test_score']
    for i in range(len(scores)):
        with mlflow.start_run(nested=True):
            mlflow.log_params(params[i])
            mlflow.log_metric('accuracy',scores[i])
            
    print(f"best score : {gridsearch.best_score_}")
    print(f"best params : {gridsearch.best_params_}")
    



🏃 View run crawling-zebra-524 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/4/runs/33956dd2e4c540d79d53bd013473554a
🧪 View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/4
🏃 View run overjoyed-bass-97 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/4/runs/aa03a3dfb4db464caa1fea3b4ea25ae2
🧪 View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/4
🏃 View run capable-eel-818 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/4/runs/f0205a1c2dae493282fcf0e9aac7dc4f
🧪 View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/4
🏃 View run adorable-dog-411 at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/4/runs/1df31f17e47a4604998d68fb8f3d4ce8
🧪 View experiment at: https://dagshub.com/yogibaba7/loan_approval_prediction.mlflow/#/experiments/4
🏃 View