Import libraries

In [38]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from scipy.stats import chi2_contingency
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report,precision_recall_fscore_support
import warnings
import os


Load the dataset

In [39]:
a1 = pd.read_excel(r'D:\CampusX\DSMP 2.0\credit_risk_pipeline\data\raw\case_study1.xlsx')
a2 = pd.read_excel(r'D:\CampusX\DSMP 2.0\credit_risk_pipeline\data\raw\case_study2.xlsx')

df1 = a1.copy()
df2 = a2.copy()

In [40]:
df1.shape

(51336, 26)

Remove nulls

In [41]:
#row-wise removal of null value from df1

df1 = df1.loc[df1['Age_Oldest_TL'] != -99999]
df1.shape

(51296, 26)

In [42]:
df2.shape

(51336, 62)

In [43]:
#column-wise removal of null value from df2

columns_to_be_removed = []
for i in df2.columns:
    if df2.loc[df2[i] == -99999].shape[0] > 10000:
        columns_to_be_removed.append(i)

df2 = df2.drop(columns_to_be_removed, axis=1)
df2.shape

(51336, 54)

In [44]:
#row-wise removal of null value from df1

for i in df2.columns:
    df2 = df2.loc[df2[i] != -99999]

df2.isna().sum()
df1.isna().sum()

PROSPECTID              0
Total_TL                0
Tot_Closed_TL           0
Tot_Active_TL           0
Total_TL_opened_L6M     0
Tot_TL_closed_L6M       0
pct_tl_open_L6M         0
pct_tl_closed_L6M       0
pct_active_tl           0
pct_closed_tl           0
Total_TL_opened_L12M    0
Tot_TL_closed_L12M      0
pct_tl_open_L12M        0
pct_tl_closed_L12M      0
Tot_Missed_Pmnt         0
Auto_TL                 0
CC_TL                   0
Consumer_TL             0
Gold_TL                 0
Home_TL                 0
PL_TL                   0
Secured_TL              0
Unsecured_TL            0
Other_TL                0
Age_Oldest_TL           0
Age_Newest_TL           0
dtype: int64

Merger

In [45]:
#checking common column names
for i in list(df1.columns):
    for i in list(df2.columns):
        print(i)

PROSPECTID
time_since_recent_payment
num_times_delinquent
max_recent_level_of_deliq
num_deliq_6mts
num_deliq_12mts
num_deliq_6_12mts
num_times_30p_dpd
num_times_60p_dpd
num_std
num_std_6mts
num_std_12mts
num_sub
num_sub_6mts
num_sub_12mts
num_dbt
num_dbt_6mts
num_dbt_12mts
num_lss
num_lss_6mts
num_lss_12mts
recent_level_of_deliq
tot_enq
CC_enq
CC_enq_L6m
CC_enq_L12m
PL_enq
PL_enq_L6m
PL_enq_L12m
time_since_recent_enq
enq_L12m
enq_L6m
enq_L3m
MARITALSTATUS
EDUCATION
AGE
GENDER
NETMONTHLYINCOME
Time_With_Curr_Empr
pct_of_active_TLs_ever
pct_opened_TLs_L6m_of_L12m
pct_currentBal_all_TL
CC_Flag
PL_Flag
pct_PL_enq_L6m_of_L12m
pct_CC_enq_L6m_of_L12m
pct_PL_enq_L6m_of_ever
pct_CC_enq_L6m_of_ever
HL_Flag
GL_Flag
last_prod_enq2
first_prod_enq2
Credit_Score
Approved_Flag
PROSPECTID
time_since_recent_payment
num_times_delinquent
max_recent_level_of_deliq
num_deliq_6mts
num_deliq_12mts
num_deliq_6_12mts
num_times_30p_dpd
num_times_60p_dpd
num_std
num_std_6mts
num_std_12mts
num_sub
num_sub_6mts
num

In [46]:
#Merge the two dataframe, inner join so that no null is present
df = pd.merge(df1,df2, how ='inner', left_on = ['PROSPECTID'], right_on = ['PROSPECTID'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42064 entries, 0 to 42063
Data columns (total 79 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   PROSPECTID                  42064 non-null  int64  
 1   Total_TL                    42064 non-null  int64  
 2   Tot_Closed_TL               42064 non-null  int64  
 3   Tot_Active_TL               42064 non-null  int64  
 4   Total_TL_opened_L6M         42064 non-null  int64  
 5   Tot_TL_closed_L6M           42064 non-null  int64  
 6   pct_tl_open_L6M             42064 non-null  float64
 7   pct_tl_closed_L6M           42064 non-null  float64
 8   pct_active_tl               42064 non-null  float64
 9   pct_closed_tl               42064 non-null  float64
 10  Total_TL_opened_L12M        42064 non-null  int64  
 11  Tot_TL_closed_L12M          42064 non-null  int64  
 12  pct_tl_open_L12M            42064 non-null  float64
 13  pct_tl_closed_L12M          420

In [47]:
df.isna().sum().sum()

0

## Hypothesis testing
 Are Martial Status vs Approved Flag are associated?

1.H0 : Null Hypothesis - not associated

2.H1 : Alternate Hypothesis -associated

3.Alpha(assumed 5%) - significance level/strictness level

less risky project = high alpha
more risky project = less alpha

4.Confidence interval = 1-alpha

5.Calculate the evidence against H0
p-value
calculate using tests:

    A. Chisquare - Cat Vs Cat

    B. T-test - Cat Vs Num(only 2 cat) -> Age Vs Approved Flag(P1 & P2)

    C. Annova - Cat Vs Num(>= 3 cat) -> Age Vs Approved Flag(P1, P2, P3 & P4)

6.p-value <= alpha
    Reject H0

p-value > alpha 
    Fail to reject

In [48]:
#check how many columns are categorical
for i in df.columns:
    if df[i].dtype == 'object':
        print(i)

MARITALSTATUS
EDUCATION
GENDER
last_prod_enq2
first_prod_enq2
Approved_Flag


In [49]:
#Chi-square test
for i in ['MARITALSTATUS','EDUCATION','last_prod_enq2','first_prod_enq2']:
    chi2, pval, _, _, = chi2_contingency(pd.crosstab(df[i],df['Approved_Flag']))
    print(i, '---', pval)

MARITALSTATUS --- 3.578180861038862e-233
EDUCATION --- 2.6942265249737532e-30
last_prod_enq2 --- 0.0
first_prod_enq2 --- 7.84997610555419e-287


In [50]:
#Since all the categorical features have pval <= 0.05 we reject null hypothesis, it means they are associated.

In [51]:
#check how many columns are numerical
numeric_columns = []
for i in df.columns:
    if df[i].dtype != 'object' and i not in ['PROSPECTID', 'Approved_Flag']:
        numeric_columns.append(i)

print(len(numeric_columns))

72


# Multicollinearity Vs Correlation

Multicollinarity -  Predictability of each other features by other features

Correlation is specififc to linear relationship between columns

In convex function(y=x2), correlation gives misleading values

### Multicollinearity

Happens when two or more independent variables are associated/associated

Problems with multicollinearity -
    1.Interpretation of IV goes wrong 
    2.Coefficient of IV become misleading

It beacomes difficult to reward/penalize the components

### Variance Inflation Factor(VIF)

It uses to check multicollinearity

Used to identify multicollinearity among IVs

VIF = 1/(1-sqr(Rsquare))

VIF->   1 - no multicollinearity,
        1 to 5 - low multicollinearity,
        5 to 10 - moderate multicollinearity,
        > 10 - high multicollinearity

In [52]:
#VIF sequentially check
vif_data = df[numeric_columns]
total_columns = vif_data.shape[1]
columns_to_be_kept = []
column_index = 0

for i in range (0,total_columns):

    vif_value = variance_inflation_factor(vif_data, column_index)
    print(column_index,'---',vif_value)

    if vif_value <= 6:
        columns_to_be_kept.append(numeric_columns[i])
        column_index = column_index + 1

    else:
        vif_data = vif_data.drop([numeric_columns[i]],axis=1)

print(len(columns_to_be_kept))

  vif = 1. / (1. - r_squared_i)


0 --- inf


  vif = 1. / (1. - r_squared_i)


0 --- inf
0 --- 11.320180023967996
0 --- 8.363698035000336
0 --- 6.520647877790928
0 --- 5.149501618212625
1 --- 2.611111040579735


  vif = 1. / (1. - r_squared_i)


2 --- inf
2 --- 1788.7926256209232
2 --- 8.601028256477228
2 --- 3.8328007921530785
3 --- 6.099653381646731
3 --- 5.5813520096427585
4 --- 1.9855843530987785


  vif = 1. / (1. - r_squared_i)


5 --- inf
5 --- 4.809538302819343
6 --- 23.270628983464636
6 --- 30.595522588100053
6 --- 4.384346405965587
7 --- 3.064658415523423
8 --- 2.898639771299251
9 --- 4.377876915347322
10 --- 2.2078535836958433
11 --- 4.916914200506864
12 --- 5.214702030064725
13 --- 3.3861625024231476
14 --- 7.840583309478997
14 --- 5.255034641721438


  vif = 1. / (1. - r_squared_i)


15 --- inf
15 --- 7.380634506427238
15 --- 1.4210050015175733
16 --- 8.083255010190316
16 --- 1.6241227524040112
17 --- 7.257811920140003
17 --- 15.59624383268298
17 --- 1.8258570471324314
18 --- 1.5080839450032664
19 --- 2.172088834824577
20 --- 2.623397553527229
21 --- 2.2959970812106176
22 --- 7.360578319196439
22 --- 2.160238777310255
23 --- 2.8686288267891467
24 --- 6.458218003637272
24 --- 2.8474118865638256
25 --- 4.753198156284083
26 --- 16.227354755948223
26 --- 6.424377256363872
26 --- 8.887080381808687
26 --- 2.3804746142952653
27 --- 8.609513476514548
27 --- 13.06755093547673
27 --- 3.500040056654654
28 --- 1.908795587481377
29 --- 17.006562234161628
29 --- 10.730485153719197
29 --- 2.3538497522950275
30 --- 22.104855915136433
30 --- 2.7971639638512906
31 --- 3.424171203217696
32 --- 10.175021454450912
32 --- 6.408710354561296
32 --- 1.0011511962625628
33 --- 3.069197305397274
34 --- 2.8091261600643724
35 --- 20.249538381980678
35 --- 15.864576541593774
35 --- 1.83316497405

In [53]:
#Check Anova for columns to be kept

from scipy.stats import f_oneway

columns_to_be_kept_numerical = []

for i in columns_to_be_kept:
    a = list(df[i])
    b = list(df['Approved_Flag'])

    group_P1 = [value for value, group in zip(a, b) if group == 'P1']
    group_P2 = [value for value, group in zip(a, b) if group == 'P2']
    group_P3 = [value for value, group in zip(a, b) if group == 'P3']
    group_P4 = [value for value, group in zip(a, b) if group == 'P4']

    f_statistic, p_value = f_oneway(group_P1, group_P2, group_P3, group_P4)

    if p_value <= 0.05:
        columns_to_be_kept_numerical.append(i)
        
print(len(columns_to_be_kept_numerical))

37


# Listing all the final features

In [54]:
features = columns_to_be_kept_numerical + ['MARITALSTATUS', 'EDUCATION', 'GENDER', 'last_prod_enq2', 'first_prod_enq2', 'Approved_Flag']
df = df[features]
df.shape

(42064, 43)

# Label Encoding for the categorical feature

In [55]:
df['MARITALSTATUS'].unique()    
df['EDUCATION'].unique()
df['GENDER'].unique()
df['last_prod_enq2'].unique()
df['first_prod_enq2'].unique()

array(['PL', 'ConsumerLoan', 'others', 'AL', 'HL', 'CC'], dtype=object)

In [56]:
# Convert EDUCATION column to ordinal values
df.loc[df['EDUCATION'] == 'SSC', 'EDUCATION'] = 1
df.loc[df['EDUCATION'] == '12TH', 'EDUCATION'] = 2
df.loc[df['EDUCATION'] == 'GRADUATE', 'EDUCATION'] = 3
df.loc[df['EDUCATION'] == 'UNDER GRADUATE', 'EDUCATION'] = 3
df.loc[df['EDUCATION'] == 'POST-GRADUATE', 'EDUCATION'] = 4
df.loc[df['EDUCATION'] == 'OTHERS', 'EDUCATION'] = 1
df.loc[df['EDUCATION'] == 'PROFESSIONAL', 'EDUCATION'] = 3

# Check the value counts after conversion
df['EDUCATION'].value_counts()
df['EDUCATION'] = df['EDUCATION'].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42064 entries, 0 to 42063
Data columns (total 43 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   pct_tl_open_L6M            42064 non-null  float64
 1   pct_tl_closed_L6M          42064 non-null  float64
 2   Tot_TL_closed_L12M         42064 non-null  int64  
 3   pct_tl_closed_L12M         42064 non-null  float64
 4   Tot_Missed_Pmnt            42064 non-null  int64  
 5   CC_TL                      42064 non-null  int64  
 6   Home_TL                    42064 non-null  int64  
 7   PL_TL                      42064 non-null  int64  
 8   Secured_TL                 42064 non-null  int64  
 9   Unsecured_TL               42064 non-null  int64  
 10  Other_TL                   42064 non-null  int64  
 11  Age_Oldest_TL              42064 non-null  int64  
 12  Age_Newest_TL              42064 non-null  int64  
 13  time_since_recent_payment  42064 non-null  int

In [57]:
df_encoded = pd.get_dummies(df, columns = ['MARITALSTATUS','GENDER','last_prod_enq2','first_prod_enq2'])
df_encoded.shape

(42064, 55)

# Machine Learning model fitting

### Accuracy, Precision, Recall, F1 Score


Accuracy -> Out of total values, how many are correctly predicted.
Accuracy = (TP + TN) / (TP + FN + FP + TN)

Recall -> Out of total actual values for a class, how many are correctlyy predicted.
Recall = (TP) / (TP + FN)  or  (TN) / (FP + TN)

Precision -> Out of my total predicted value for a class, how many are correctly predicted.
Precision = (TP) / (TP + FP)  or  (TN) / (FN + TN)

F1 Score -> (2PR)/(P+R)

### 1.Random Forest

In [58]:
y = df_encoded['Approved_Flag']
x = df_encoded.drop(['Approved_Flag'],axis = 1)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state = 42)

rfc = RandomForestClassifier(n_estimators= 200, random_state= 42)

rfc.fit(x_train, y_train)

y_pred = rfc.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print()
print(f'Accuracy: {accuracy}')
print()
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred) 


for i,v in enumerate(['p1','p2','p3','p4']):
    print(f'Class {v}')
    print(f'Precision: {precision[i]}')
    print(f'Recall: {recall[i]}')
    print(f'F1 score: {f1_score[i]}')
    print()


Accuracy: 0.7636990372043266

Class p1
Precision: 0.8370457209847597
Recall: 0.7041420118343196
F1 score: 0.7648634172469202

Class p2
Precision: 0.7957519116397621
Recall: 0.9282457879088206
F1 score: 0.856907593778591

Class p3
Precision: 0.4423380726698262
Recall: 0.21132075471698114
F1 score: 0.28600612870275793

Class p4
Precision: 0.7178502879078695
Recall: 0.7269193391642371
F1 score: 0.7223563495895703



### 2.XGBoost

In [59]:
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

xgbc = xgb.XGBClassifier(objective = 'multi:softmax', num_class=4)

y = df_encoded['Approved_Flag']
x = df_encoded.drop(['Approved_Flag'],axis = 1)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.2, random_state = 42)


xgbc.fit(x_train, y_train)

y_pred = xgbc.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print()
print(f'Accuracy: {accuracy}')
print()
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred) 


for i,v in enumerate(['p1','p2','p3','p4']):
    print(f'Class {v}')
    print(f'Precision: {precision[i]}')
    print(f'Recall: {recall[i]}')
    print(f'F1 score: {f1_score[i]}')
    print()


Accuracy: 0.7783192677998336

Class p1
Precision: 0.823906083244397
Recall: 0.7613412228796844
F1 score: 0.7913890312660175

Class p2
Precision: 0.8255418233924413
Recall: 0.913577799801784
F1 score: 0.8673315769665035

Class p3
Precision: 0.4756380510440835
Recall: 0.30943396226415093
F1 score: 0.37494284407864653

Class p4
Precision: 0.7342386032977691
Recall: 0.7356656948493683
F1 score: 0.7349514563106796



### 3.Decision Tree

In [60]:
from sklearn.tree import DecisionTreeClassifier

y = df_encoded['Approved_Flag']
x = df_encoded.drop(['Approved_Flag'], axis = 1)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.2, random_state=42)

dt_model =DecisionTreeClassifier(max_depth=20, min_samples_split=10)
dt_model.fit(x_train, y_train)
y_pred = dt_model.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print()
print(f'Accuracy: {accuracy: .2f}')
print()

precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)

for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision: {precision[i]}")
    print(f"Recall: {recall[i]}")
    print(f"F1 Score: {f1_score[i]}")
    print()


Accuracy:  0.71

Class p1:
Precision: 0.7278978388998035
Recall: 0.7307692307692307
F1 Score: 0.7293307086614174

Class p2:
Precision: 0.8117097840886986
Recall: 0.8271555996035679
F1 Score: 0.8193599057529943

Class p3:
Precision: 0.34365079365079365
Recall: 0.3267924528301887
F1 Score: 0.33500967117988395

Class p4:
Precision: 0.6509054325955734
Recall: 0.6287657920310982
F1 Score: 0.6396440929312902



XGboost is giving me best results
We will further finetune it

# Apply standard scaler

In [61]:
from sklearn.preprocessing import StandardScaler

columns_to_be_scaled = ['Age_Oldest_TL','Age_Newest_TL','time_since_recent_payment','max_recent_level_of_deliq','recent_level_of_deliq','time_since_recent_enq','NETMONTHLYINCOME','Time_With_Curr_Empr']

for i in columns_to_be_scaled:
    column_data = df_encoded[i].values.reshape(-1, 1)
    scaler = StandardScaler()
    scaled_column =scaler.fit_transform(column_data)
    df_encoded[i] = scaled_column

In [62]:
import xgboost as xgb 
from sklearn.preprocessing import LabelEncoder

xgbc = xgb.XGBClassifier(objective='multi:softmax', num_class=4)

y = df_encoded['Approved_Flag']
x = df_encoded. drop ( ['Approved_Flag'], axis = 1 )

y_encoded = LabelEncoder().fit_transform(y)

x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.2, random_state=42)

xgbc.fit(x_train, y_train)
y_pred = xgbc.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)

for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision: {precision[i]}")
    print(f"Recall: {recall[i]}")
    print(f"F1 Score: {f1_score[i]}")
    print()


Accuracy: 0.78
Class p1:
Precision: 0.823906083244397
Recall: 0.7613412228796844
F1 Score: 0.7913890312660175

Class p2:
Precision: 0.8255418233924413
Recall: 0.913577799801784
F1 Score: 0.8673315769665035

Class p3:
Precision: 0.4756380510440835
Recall: 0.30943396226415093
F1 Score: 0.37494284407864653

Class p4:
Precision: 0.7342386032977691
Recall: 0.7356656948493683
F1 Score: 0.7349514563106796



No improvement in metrics

In [63]:
# Hyperparameter tuning in xgboost
from sklearn.model_selection import GridSearchCV
x_train, x_test, y_train, y_test = train_test_split(x,y_encoded, test_size = 0.2, random_state = 42)

#Define the XGBClassifer with the initial set of parameter tuning
xgb_model = xgb.XGBClassifier(objective='multi:softmax', num_class=4)

#Define the parameter grid for hyperparameter tuning

param_grid = {
    'n_estimator': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2]
}
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(x_train, y_train)

#Print the beat hyperparameters
print('Best Hyperparameters',grid_search.best_params_)

#Evaluate the model with the best hyperparameters on the test set
best_model = grid_search.best_estimator_
accuracy = best_model.score(x_test, y_test)

Parameters: { "n_estimator" } are not used.



Best Hyperparameters {'learning_rate': 0.2, 'max_depth': 5, 'n_estimator': 50}


In [64]:
# Hyperparameter tuning for xgboost (Used in the session)

# Define the hyperparameter grid
param_grid = {
 'colsample_bytree': [0.1, 0.3, 0.5, 0.7, 0.9],
 'learning_rate'   : [0.001, 0.01, 0.1, 1],
 'max_depth'       : [3, 5, 8, 10],
 'alpha'           : [1, 10, 100],
 'n_estimators'    : [10,50,100]
}

index = 0

answers_grid = {
     'combination'       :[],
     'train_Accuracy'    :[],
     'test_Accuracy'     :[],
     'colsample_bytree'  :[],
     'learning_rate'     :[],
     'max_depth'         :[],
     'alpha'             :[],
     'n_estimators'      :[]

     }


 # Loop through each combination of hyperparameters
for colsample_bytree in param_grid['colsample_bytree']:
   for learning_rate in param_grid['learning_rate']:
     for max_depth in param_grid['max_depth']:
       for alpha in param_grid['alpha']:
           for n_estimators in param_grid['n_estimators']:
            
               index = index + 1
             
               # Define and train the XGBoost model
               model = xgb.XGBClassifier(objective='multi:softmax',  
                                        num_class=4,
                                        colsample_bytree = colsample_bytree,
                                        learning_rate = learning_rate,
                                        max_depth = max_depth,
                                        alpha = alpha,
                                        n_estimators = n_estimators)
               
       
                     
               y = df_encoded['Approved_Flag']
               x = df_encoded. drop ( ['Approved_Flag'], axis = 1 )

               label_encoder = LabelEncoder()
               y_encoded = label_encoder.fit_transform(y)


               x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.2, random_state=42)


               model.fit(x_train, y_train)
  

       
               # Predict on training and testing sets
               y_pred_train = model.predict(x_train)
               y_pred_test = model.predict(x_test)
       
       
               # Calculate train and test results
              
               train_accuracy =  accuracy_score (y_train, y_pred_train)
               test_accuracy  =  accuracy_score (y_test , y_pred_test)
              
              
       
               # Include into the lists
               answers_grid ['combination']   .append(index)
               answers_grid ['train_Accuracy']    .append(train_accuracy)
               answers_grid ['test_Accuracy']     .append(test_accuracy)
               answers_grid ['colsample_bytree']   .append(colsample_bytree)
               answers_grid ['learning_rate']      .append(learning_rate)
               answers_grid ['max_depth']          .append(max_depth)
               answers_grid ['alpha']              .append(alpha)
               answers_grid ['n_estimators']       .append(n_estimators)
       
       
               # Print results for this combination
               print(f"Combination {index}")
               print(f"colsample_bytree: {colsample_bytree}, learning_rate: {learning_rate}, max_depth: {max_depth}, alpha: {alpha}, n_estimators: {n_estimators}")
               print(f"Train Accuracy: {train_accuracy:.2f}")
               print(f"Test Accuracy : {test_accuracy :.2f}")
               print("-" * 30)








Combination 1
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 10
Train Accuracy: 0.61
Test Accuracy : 0.60
------------------------------
Combination 2
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 50
Train Accuracy: 0.61
Test Accuracy : 0.60
------------------------------
Combination 3
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 100
Train Accuracy: 0.61
Test Accuracy : 0.60
------------------------------
Combination 4
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 10
Train Accuracy: 0.61
Test Accuracy : 0.60
------------------------------
Combination 5
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 50
Train Accuracy: 0.61
Test Accuracy : 0.60
------------------------------
Combination 6
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 100
Train Accuracy: 0.61
Test Accu

KeyboardInterrupt: 