In [1]:
import warnings
warnings.simplefilter('ignore')

## Importing Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

## Loading Data

In [3]:
df = pd.read_csv('train.csv')

In [4]:
df.shape

(614, 13)

In [5]:
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [6]:
df = df.drop(['Loan_ID'],axis=1)

In [7]:
df.Self_Employed.value_counts()

No     500
Yes     82
Name: Self_Employed, dtype: int64

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Gender             601 non-null    object 
 1   Married            611 non-null    object 
 2   Dependents         599 non-null    object 
 3   Education          614 non-null    object 
 4   Self_Employed      582 non-null    object 
 5   ApplicantIncome    614 non-null    int64  
 6   CoapplicantIncome  614 non-null    float64
 7   LoanAmount         592 non-null    float64
 8   Loan_Amount_Term   600 non-null    float64
 9   Credit_History     564 non-null    float64
 10  Property_Area      614 non-null    object 
 11  Loan_Status        614 non-null    object 
dtypes: float64(4), int64(1), object(7)
memory usage: 57.7+ KB


In [9]:
df.isnull().sum()

Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

## Filling Missing Values

In [10]:
df.Gender.fillna(df['Gender'].mode()[0], inplace=True)

In [11]:
df.Married.fillna(df['Married'].mode()[0], inplace=True)

In [12]:
df.Dependents.fillna(df['Dependents'].mode()[0], inplace=True)

In [13]:
df.Self_Employed.fillna(df['Self_Employed'].mode()[0], inplace=True)

In [14]:
df.LoanAmount.fillna(df['LoanAmount'].mode()[0], inplace=True)

In [15]:
df.Loan_Amount_Term.fillna(df['Loan_Amount_Term'].mode()[0], inplace=True)

In [16]:
df.Credit_History.fillna(df['Credit_History'].mode()[0], inplace=True)

In [17]:
df.isnull().sum()

Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

## Converting String Values into Numerical

In [18]:
df.Dependents.value_counts()

0     360
1     102
2     101
3+     51
Name: Dependents, dtype: int64

In [19]:
for i in range(len(df['Dependents'])):
    if df['Dependents'][i] == '3+':
        df['Dependents'][i] = 3
df.Dependents.value_counts()

0    360
1    102
2    101
3     51
Name: Dependents, dtype: int64

In [20]:
df.Self_Employed.value_counts()

No     532
Yes     82
Name: Self_Employed, dtype: int64

In [21]:
le = LabelEncoder()

In [22]:
df['Self_Employed'] = le.fit_transform(df['Self_Employed'])
#No = 0
#Yes = 1

In [23]:
df['Loan_Status'] = le.fit_transform(df['Loan_Status'])

In [24]:
df['Loan_Status'].value_counts()
#Yes = 1
#No = 0

1    422
0    192
Name: Loan_Status, dtype: int64

In [25]:
df['Education'].value_counts()

Graduate        480
Not Graduate    134
Name: Education, dtype: int64

In [26]:
df['Education'] = le.fit_transform(df['Education'])
#Graduate = 0
#Not Graduate = 1

In [27]:
df.Gender = le.fit_transform(df.Gender)
df.Gender.value_counts()
#Male = 0
#Female = 1

1    502
0    112
Name: Gender, dtype: int64

In [28]:
df.Married.value_counts()

Yes    401
No     213
Name: Married, dtype: int64

In [29]:
df.Married = le.fit_transform(df.Married)
#Yes = 1
#No = 0

In [30]:
df = df.astype({'Dependents':'int32'})

In [31]:
df.Property_Area.value_counts()

Semiurban    233
Urban        202
Rural        179
Name: Property_Area, dtype: int64

In [32]:
df.Property_Area = le.fit_transform(df.Property_Area)
#Rural = 0
#SemiUrban = 1
#Urban = 2

In [33]:
df.Property_Area.value_counts()

1    233
2    202
0    179
Name: Property_Area, dtype: int64

In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Gender             614 non-null    int32  
 1   Married            614 non-null    int32  
 2   Dependents         614 non-null    int32  
 3   Education          614 non-null    int32  
 4   Self_Employed      614 non-null    int32  
 5   ApplicantIncome    614 non-null    int64  
 6   CoapplicantIncome  614 non-null    float64
 7   LoanAmount         614 non-null    float64
 8   Loan_Amount_Term   614 non-null    float64
 9   Credit_History     614 non-null    float64
 10  Property_Area      614 non-null    int32  
 11  Loan_Status        614 non-null    int32  
dtypes: float64(4), int32(7), int64(1)
memory usage: 40.9 KB


## Building Model

In [35]:
X = df.drop(['Loan_Status'], axis=1)
y = df['Loan_Status']

In [36]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(X, y)
lr.score(X, y)

0.8094462540716613

In [37]:
X.columns

Index(['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed',
       'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area'],
      dtype='object')

In [38]:
lr.predict([[1,0,0,0,0,5849,0.0,120.0,360.0,1.0,2]])

array([1])

## Deploying Model

In [39]:
import pickle

In [40]:
pickle.dump(lr, open('loan_model.pkl','wb'))

In [41]:
md = pickle.load(open('loan_model.pkl', 'rb'))

In [43]:
md.predict([[1,0,0,0,0,5849,0,120.0,360.0,1.0,2]])

array([1])

In [44]:
md.predict([[1,0,0,0,0,5000,12,12.00,0,1.0,2]])

array([1])