# **Credit Scoring Model**

In [1]:
import pandas as pd

df = pd.read_csv('bank.csv')
display(df)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4516,33,services,married,secondary,no,-333,yes,no,cellular,30,jul,329,5,-1,0,unknown,no
4517,57,self-employed,married,tertiary,yes,-3313,yes,yes,unknown,9,may,153,1,-1,0,unknown,no
4518,57,technician,married,secondary,no,295,no,no,cellular,19,aug,151,11,-1,0,unknown,no
4519,28,blue-collar,married,secondary,no,1137,no,no,cellular,6,feb,129,4,211,3,other,no


## Data Preparation

### Preprocessing (One-hot encoding)

In [2]:
dummy_df = pd.get_dummies(df, columns=['job', 'marital', 'education', 'contact', 'month', 'poutcome'])


In [3]:
dummy_df['default'] = dummy_df['default'].map({'no':0,'yes':1})
dummy_df['housing'] = dummy_df['housing'].map({'no':0,'yes':1})
dummy_df['loan'] = dummy_df['loan'].map({'no':0,'yes':1})
dummy_df['y'] = dummy_df['y'].map({'no':0,'yes':1})

### Data Separation into x and y

In [4]:
y_temp = df['y']
y = dummy_df['y']
x_temp = df.drop('y',axis=1)
x = dummy_df.drop('y',axis=1)

### Split into 80% train and 20% test data

In [5]:
from sklearn.model_selection import train_test_split
x_train_temp, x_test_temp, y_train_temp, y_test_temp = train_test_split(x_temp,y_temp,test_size=0.2,random_state=100)
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=100)

## Building the Model

### Training the Model

In [6]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(x_train,y_train)

In [7]:
len(x_test)

905

In [8]:
x_test.reset_index(drop=True, inplace=True)
x_test_temp.reset_index(drop=True, inplace=True)

In [9]:
y_rf_pred = rf.predict(x_test)

In [10]:
y_rf_pred

array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [11]:
prediction = pd.DataFrame(y_rf_pred)
prediction

Unnamed: 0,0
0,0
1,0
2,1
3,0
4,0
...,...
900,0
901,0
902,0
903,0


In [12]:
prediction.columns = ['Loan Approved']
prediction

Unnamed: 0,Loan Approved
0,0
1,0
2,1
3,0
4,0
...,...
900,0
901,0
902,0
903,0


In [13]:
prediction['Loan Approved'] = prediction['Loan Approved'].map({0:'no',1:'yes'})
prediction

Unnamed: 0,Loan Approved
0,no
1,no
2,yes
3,no
4,no
...,...
900,no
901,no
902,no
903,no


In [14]:
test_table_prediction = pd.concat([x_test_temp, prediction],axis=1).reset_index(drop=True)
test_table_prediction

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,Loan Approved
0,52,retired,married,secondary,no,424,no,no,cellular,19,nov,143,1,154,3,failure,no
1,40,management,single,unknown,no,838,yes,no,unknown,12,may,619,3,-1,0,unknown,no
2,32,technician,single,tertiary,no,5514,no,no,cellular,22,apr,319,1,182,1,success,yes
3,46,admin.,married,secondary,no,556,yes,yes,unknown,15,may,646,3,-1,0,unknown,no
4,42,entrepreneur,married,tertiary,no,0,no,no,cellular,9,jul,236,1,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
900,50,blue-collar,married,primary,no,8139,yes,no,cellular,18,aug,256,6,-1,0,unknown,no
901,38,self-employed,single,secondary,no,1146,yes,no,unknown,21,may,91,2,-1,0,unknown,no
902,50,housemaid,married,primary,no,395,yes,no,unknown,15,may,419,2,-1,0,unknown,no
903,42,housemaid,married,primary,no,83,no,yes,cellular,7,aug,184,1,-1,0,unknown,no


## Checking accuracy and precision

In [15]:
from sklearn.metrics import accuracy_score,precision_score

In [16]:
accuracy = accuracy_score(y_test, y_rf_pred)
precision = precision_score(y_test, y_rf_pred)
print("Accuracy:", accuracy)
print("Precision:",precision)

Accuracy: 0.907182320441989
Precision: 0.6410256410256411
