# Loan Approval Prediction


## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('loan_approval_dataset.csv')

In [3]:
dataset.info()
dataset.isnull().sum()
dataset.drop("loan_id", axis=1, inplace=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4269 entries, 0 to 4268
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   loan_id                    4269 non-null   int64 
 1    no_of_dependents          4269 non-null   int64 
 2    education                 4269 non-null   object
 3    self_employed             4269 non-null   object
 4    income_annum              4269 non-null   int64 
 5    loan_amount               4269 non-null   int64 
 6    loan_term                 4269 non-null   int64 
 7    cibil_score               4269 non-null   int64 
 8    residential_assets_value  4269 non-null   int64 
 9    commercial_assets_value   4269 non-null   int64 
 10   luxury_assets_value       4269 non-null   int64 
 11   bank_asset_value          4269 non-null   int64 
 12   loan_status               4269 non-null   object
dtypes: int64(10), object(3)
memory usage: 433.7+ KB


In [4]:
dataset.head(90)
print(dataset.columns)
dataset[' self_employed']=pd.get_dummies(dataset[' self_employed'],drop_first=True)
dataset[' education'] = pd.get_dummies(dataset[' education'], drop_first=True)
dataset[' loan_status'] = pd.get_dummies(dataset[' loan_status'], drop_first=True)

Index([' no_of_dependents', ' education', ' self_employed', ' income_annum',
       ' loan_amount', ' loan_term', ' cibil_score',
       ' residential_assets_value', ' commercial_assets_value',
       ' luxury_assets_value', ' bank_asset_value', ' loan_status'],
      dtype='object')


In [5]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
dataset[' education'] = le.fit_transform(dataset[' education'])
dataset[' self_employed'] = le.fit_transform(dataset[' self_employed'])
dataset[' loan_status'] = le.fit_transform(dataset[' loan_status'])

In [6]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
dataset

Unnamed: 0,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,2,0,0,9600000,29900000,12,778,2400000,17600000,22700000,8000000,0
1,0,1,1,4100000,12200000,8,417,2700000,2200000,8800000,3300000,1
2,3,0,0,9100000,29700000,20,506,7100000,4500000,33300000,12800000,1
3,3,0,0,8200000,30700000,8,467,18200000,3300000,23300000,7900000,1
4,5,1,1,9800000,24200000,20,382,12400000,8200000,29400000,5000000,1
...,...,...,...,...,...,...,...,...,...,...,...,...
4264,5,0,1,1000000,2300000,12,317,2800000,500000,3300000,800000,1
4265,0,1,1,3300000,11300000,20,559,4200000,2900000,11000000,1900000,0
4266,2,1,0,6500000,23900000,18,457,1200000,12400000,18100000,7300000,1
4267,1,1,0,4100000,12800000,8,780,8200000,700000,14100000,5800000,0


## Splitting the dataset into the Training set and Test set

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [8]:
print(X_train)

[[       0        0        0 ...  6900000 14500000  9300000]
 [       5        1        1 ... 13600000 29100000  4900000]
 [       2        0        0 ...  3800000 28400000 13900000]
 ...
 [       3        1        1 ... 10300000 25300000  7400000]
 [       4        1        0 ...  2500000 21000000  6600000]
 [       1        1        1 ...  7600000 16000000  3200000]]


In [9]:
print(y_train)

[1 0 1 ... 0 0 0]


In [10]:
print(X_test)

[[       1        0        1 ...  5300000 28700000  4200000]
 [       2        0        1 ... 12200000 17800000 10800000]
 [       1        0        0 ...  3500000 18700000  8300000]
 ...
 [       4        1        0 ...  2600000  7100000  4100000]
 [       5        1        1 ... 12500000 37000000 12900000]
 [       5        0        1 ...  4300000 11000000  4600000]]


In [11]:
print(y_test)

[0 0 0 ... 0 1 0]


## Feature Scaling

In [12]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [13]:
print(X_train)

[[-1.48260495 -0.99284037 -1.00093765 ...  0.43005882 -0.06389485
   1.32015891]
 [ 1.47024375  1.00721126  0.99906323 ...  1.95775989  1.5454289
  -0.0305738 ]
 [-0.30146547 -0.99284037 -1.00093765 ... -0.27678795  1.46826955
   2.73228855]
 ...
 [ 0.28910427  1.00721126  0.99906323 ...  1.20531011  1.12656382
   0.73688797]
 [ 0.87967401  1.00721126 -1.00093765 ... -0.57320756  0.6525849
   0.4913002 ]
 [-0.89203521  1.00721126  0.99906323 ...  0.58966938  0.10144663
  -0.5524478 ]]


In [14]:
print(X_test)

[[-0.89203521 -0.99284037  0.99906323 ...  0.06523468  1.50133784
  -0.24546309]
 [-0.30146547 -0.99284037  0.99906323 ...  1.63853877  0.29985641
   1.78063596]
 [-0.89203521 -0.99284037 -1.00093765 ... -0.34519247  0.3990613
   1.0131742 ]
 ...
 [ 0.87967401  1.00721126 -1.00093765 ... -0.55040605 -0.87957949
  -0.27616156]
 [ 1.47024375  1.00721126  0.99906323 ...  1.7069433   2.41622737
   2.42530384]
 [ 1.47024375 -0.99284037  0.99906323 ... -0.1627804  -0.44969164
  -0.12266921]]


## Training the SVM model on the Training set

In [15]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)

## Making the Confusion Matrix

In [16]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[624  35]
 [ 31 378]]


0.9382022471910112