In [2]:
from google.colab import drive
drive.mount('/content/drive')

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings( "ignore" )

class LR():
	def __init__(self, act,lR=0.01, n_iters=1000):
		self.lR = lR
		self.n_iters = n_iters
		self.act = act if act != None else 'sigmoid'

	def activator(self,x):
		if self.act=='relu':
			res = [i  if i>0 else 0 for i in x]
			res = np.array(res)
			return res
		elif self.act=='prelu':
			res = [i*self.lR  if i>0 else 0 for i in x]
			res = np.array(res)
			return res
		elif self.act=='softmax':
			exp_x=np.exp(x-np.max(x))
			return exp_x/np.sum(exp_x)
		else: return 1/(1+np.exp(-x))


	def fit(self, X, y):
		X = np.array(X)
		n_samples, n_features = X.shape
		self.weights = np.zeros(n_features)
		print(self.weights.shape, X.shape, sep=' ')
		self.bias = 0
		for i in range(self.n_iters):
			linear_pred = np.dot(X, self.weights) + self.bias
			predictions = self.activator(linear_pred)

			dw = (2/n_samples) * np.dot(X.T, (predictions-y))
			db = (2/n_samples) * np.sum(predictions-y)

		self.weights = self.weights - self.lR*dw
		self.bias = self.bias - self.lR*db

	def predict(self, X):
		linear_pred = np.dot(X, self.weights)+ self.bias
		y_pred = self.activator(linear_pred)
		class_pred = [0 if y<=0.5 else 1 for y in y_pred]
		return class_pred

df = pd.read_csv('/content/drive/My Drive/loan_data.csv')
df = df.select_dtypes(include=[np.number])

print(df.shape)
print(df.columns)
print(df['loan_status'].unique())

df.drop('person_age', axis=1, inplace=True)

print(df.shape)
print(df.columns)

X = df[[ 'person_income', 'person_emp_exp', 'loan_amnt', 'loan_int_rate',
       'loan_percent_income', 'cb_person_cred_hist_length', 'credit_score']]
Y = df['loan_status']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=12345678)

model = LR('softmax', lR=0.01, n_iters=1000)
model.fit(X_train, Y_train)
model1 = LogisticRegression()
model1.fit(X_train, Y_train)

Y_pred = model.predict(X_test)
Y_pred1 = model1.predict(X_test)

def accuracy(y_pred, y_test):
    return np.sum(y_pred==y_test)/len(y_test)

acc = accuracy(Y_pred, Y_test)
print("Our accuracy ", acc)
acc1 = accuracy(Y_pred1, Y_test)
print("Sklearn accuracy ",acc1)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
(45000, 9)
Index(['person_age', 'person_income', 'person_emp_exp', 'loan_amnt',
       'loan_int_rate', 'loan_percent_income', 'cb_person_cred_hist_length',
       'credit_score', 'loan_status'],
      dtype='object')
[1 0]
(45000, 8)
Index(['person_income', 'person_emp_exp', 'loan_amnt', 'loan_int_rate',
       'loan_percent_income', 'cb_person_cred_hist_length', 'credit_score',
       'loan_status'],
      dtype='object')
(7,) (31500, 7)
Our accuracy  0.7797037037037037
Sklearn accuracy  0.8196296296296296


We can see that there are 14 different columns, with the last column, loan_status being the target variable. The other columns contain various factors that may help determining in whether loan will be allocated or not. Out of aforesaid columns, age and gender may be dropped as they might perhaps not be a clear indicator. Also, we will be dropping all the text columns, as during logistic regression we cannot use non numerical values