In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
import joblib
import numpy as np
from tkinter import *

In [5]:
# Load Dataset
data = pd.read_csv("C:/Users/user/Downloads/loan_prediction.csv")
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [6]:
# Data Preprocessing
data = data.drop('Loan_ID', axis=1) #loan id does not correlate the data
data = data.dropna(subset=['Gender', 'Dependents', 'LoanAmount', 'Loan_Amount_Term']) #these columns have less than 5% null val
data['Self_Employed'] = data['Self_Employed'].fillna(data['Self_Employed'].mode()[0]) #assingning null vals with mode val
data['Credit_History'] = data['Credit_History'].fillna(data['Credit_History'].mode()[0]) 
data['Dependents'] = data['Dependents'].replace(to_replace='3+', value='4') #assigning 4 as val for 3+ dependents
data['Gender'] = data['Gender'].map({'Male': 1, 'Female': 0}).astype('int')
data['Married'] = data['Married'].map({'Yes': 1, 'No': 0}).astype('int')
data['Education'] = data['Education'].map({'Graduate': 1, 'Not Graduate': 0}).astype('int')
data['Self_Employed'] = data['Self_Employed'].map({'Yes': 1, 'No': 0}).astype('int')
data['Property_Area'] = data['Property_Area'].map({'Rural': 0, 'Semiurban': 2, 'Urban': 1}).astype('int')
data['Loan_Status'] = data['Loan_Status'].map({'Y': 1, 'N': 0}).astype('int')


In [7]:
# Feature Matrix and Target Vector
X = data.drop('Loan_Status', axis=1)
y = data['Loan_Status']

In [8]:
# Feature Scaling
cols = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term']
st = StandardScaler()
X[cols] = st.fit_transform(X[cols])

In [9]:
# Model Evaluation Function
model_df = {}
def model_val(model, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{model} accuracy is {accuracy_score(y_test, y_pred)}")
    score = cross_val_score(model, X, y, cv=5) #5 fold crossval 
    print(f"{model} Avg cross val score is {np.mean(score)}")
    model_df[str(model)] = round(np.mean(score) * 100, 2) #we print avg of 5 iterations of crossval folds

In [10]:
# Model Training
models = [LogisticRegression(), svm.SVC(), DecisionTreeClassifier(), RandomForestClassifier(), GradientBoostingClassifier()]
for model in models:
    model_val(model, X, y)

LogisticRegression() accuracy is 0.8018018018018018
LogisticRegression() Avg cross val score is 0.8047829647829647
SVC() accuracy is 0.7927927927927928
SVC() Avg cross val score is 0.7938902538902539
DecisionTreeClassifier() accuracy is 0.7837837837837838
DecisionTreeClassifier() Avg cross val score is 0.708927108927109
RandomForestClassifier() accuracy is 0.7657657657657657
RandomForestClassifier() Avg cross val score is 0.7920720720720722
GradientBoostingClassifier() accuracy is 0.7927927927927928
GradientBoostingClassifier() Avg cross val score is 0.7667485667485667


In [11]:
# Hyperparameter Tuning
# Logistic Regression
log_reg_grid = {"C": np.logspace(-4, 4, 20), "solver": ['liblinear']}
rs_log_reg = RandomizedSearchCV(LogisticRegression(), param_distributions=log_reg_grid, n_iter=20, cv=5, verbose=True)
rs_log_reg.fit(X, y)
print(rs_log_reg.best_score_)
print(rs_log_reg.best_params_)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
0.8047829647829647
{'solver': 'liblinear', 'C': 0.23357214690901212}


In [12]:
svc_grid = {'C': [0.25, 0.50, 0.75, 1], "kernel": ["linear"]}
gs_svc = GridSearchCV(svm.SVC(), param_grid=svc_grid, cv=5, verbose=True)
gs_svc.fit(X, y)
print(gs_svc.best_score_)
print(gs_svc.best_params_)

Fitting 5 folds for each of 4 candidates, totalling 20 fits
0.8066011466011467
{'C': 0.25, 'kernel': 'linear'}


In [13]:
rf_grid = {
    'n_estimators': np.arange(10, 1000, 10),
    'max_features': ['sqrt'],
    'max_depth': [None, 3, 5, 10, 20, 30],
    'min_samples_split': [2, 5, 20, 50, 100],
    'min_samples_leaf': [1, 2, 5, 10]
}
rs_rf = RandomizedSearchCV(RandomForestClassifier(), param_distributions=rf_grid, cv=5, n_iter=20, verbose=True)
rs_rf.fit(X, y)
print(rs_rf.best_score_)
print(rs_rf.best_params_)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
0.8066011466011467
{'n_estimators': 280, 'min_samples_split': 50, 'min_samples_leaf': 10, 'max_features': 'sqrt', 'max_depth': 20}


In [3]:
# Load and Predict
import joblib
import pandas as pd
model = joblib.load('loan_status_predict')
df = pd.DataFrame({
    'Gender': [1],
    'Married': [1],
    'Dependents': [2],
    'Education': [0],
    'Self_Employed': [0],
    'ApplicantIncome': [2889],
    'CoapplicantIncome': [0.0],
    'LoanAmount': [45],
    'Loan_Amount_Term': [180],
    'Credit_History': [0],
    'Property_Area': [1]
})
result = model.predict(df)
if result == 1:
    print("Loan Approved")
else:
    print("Loan Not Approved")


Loan Not Approved


In [26]:
import pandas as pd
from tkinter import *
from tkinter import messagebox
import joblib

# Load the trained model
model = joblib.load('loan_status_predict')

def validate_inputs():
    try:
        p1 = int(gender_var.get())
        p2 = int(married_var.get())
        p3 = int(dependents_var.get())
        p4 = int(education_var.get())
        p5 = int(self_employed_var.get())
        p6 = float(applicant_income_var.get())
        p7 = float(coapplicant_income_var.get())
        p8 = float(loan_amount_var.get())
        p9 = float(loan_amount_term_var.get())
        p10 = int(credit_history_var.get())
        p11 = int(property_area_var.get())
        
        return {
            'Gender': p1,
            'Married': p2,
            'Dependents': p3,
            'Education': p4,
            'Self_Employed': p5,
            'ApplicantIncome': p6,
            'CoapplicantIncome': p7,
            'LoanAmount': p8,
            'Loan_Amount_Term': p9,
            'Credit_History': p10,
            'Property_Area': p11
        }
    except ValueError:
        messagebox.showerror("Input Error", "Please enter valid inputs")
        return None

def predict_loan_status():
    inputs = validate_inputs()
    if inputs:
        df = pd.DataFrame(inputs, index=[0])
        result = model.predict(df)
        if result == 1:
            messagebox.showinfo("Prediction Result", "Loan Approved")
        else:
            messagebox.showinfo("Prediction Result", "Loan Not Approved")

# Create the main application window
master = Tk()
master.title("Loan Status Prediction Using Machine Learning")
master.geometry("700x800")
master.configure(bg="lightblue")

# Title Label
title_label = Label(master, text="Loan Status Prediction", font=("Helvetica", 16, "bold"), bg="black", fg="white")
title_label.pack(pady=10)

# Frame for input fields
frame = Frame(master, bg="lightblue")
frame.pack(pady=20, padx=20)

# Define input variables
gender_var = StringVar()
married_var = StringVar()
dependents_var = StringVar()
education_var = StringVar()
self_employed_var = StringVar()
applicant_income_var = StringVar()
coapplicant_income_var = StringVar()
loan_amount_var = StringVar()
loan_amount_term_var = StringVar()
credit_history_var = StringVar()
property_area_var = StringVar()

# Create input fields
fields = [
    ("Gender [1:Male ,0:Female]", gender_var),
    ("Married [1:Yes,0:No]", married_var),
    ("Dependents [0,1,2,3+]", dependents_var),
    ("Education [1:Graduate, 0:Not Graduate]", education_var),
    ("Self_Employed [1:Yes, 0:No]", self_employed_var),
    ("ApplicantIncome", applicant_income_var),
    ("CoapplicantIncome", coapplicant_income_var),
    ("LoanAmount", loan_amount_var),
    ("Loan_Amount_Term", loan_amount_term_var),
    ("Credit_History [1:Yes, 0:No]", credit_history_var),
    ("Property_Area [0:Rural, 1:Urban, 2:Semiurban]", property_area_var)
]

for label_text, var in fields:
    label = Label(frame, text=label_text, bg="lightblue", anchor="w")
    label.pack(fill=X, pady=5)
    entry = Entry(frame, textvariable=var)
    entry.pack(fill=X, pady=5)

# Prediction Button
predict_button = Button(master, text="Predict", command=predict_loan_status, bg="blue", fg="white", font=("Helvetica", 12, "bold"))
predict_button.pack(pady=5)

mainloop()
