In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import joblib


In [None]:
url = 'https://raw.githubusercontent.com/edyoda/data-science-complete-tutorial/master/Data/loan_prediction_data.csv'
data = pd.read_csv(url)

In [None]:
df.head()

In [None]:
print(df.describe())
print(df.info())

In [None]:

print(df.isnull().sum())

In [None]:

df.hist(figsize=(10, 8))
plt.show()


sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()

In [None]:

df.drop_duplicates(inplace=True)
df.fillna(df.mean(), inplace=True)


In [None]:

df['Gender'] = LabelEncoder().fit_transform(df['Gender'])
df = pd.get_dummies(df, drop_first=True)

In [None]:

X = df.drop('Loan_Status', axis=1)
y = df['Loan_Status']

In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
# Logistic Regression
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
y_pred_log = logistic_model.predict(X_test)

print("Logistic Regression - Accuracy:", accuracy_score(y_test, y_pred_log))
print("Logistic Regression - Precision:", precision_score(y_test, y_pred_log))
print("Logistic Regression - Recall:", recall_score(y_test, y_pred_log))
print("Logistic Regression - F1 Score:", f1_score(y_test, y_pred_log))

In [None]:
# Decision Tree Classifier
tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train)
y_pred_tree = tree_model.predict(X_test)

print("Decision Tree - Accuracy:", accuracy_score(y_test, y_pred_tree))
print("Decision Tree - Precision:", precision_score(y_test, y_pred_tree))
print("Decision Tree - Recall:", recall_score(y_test, y_pred_tree))
print("Decision Tree - F1 Score:", f1_score(y_test, y_pred_tree))

In [None]:

# Random Forest Classifier
forest_model = RandomForestClassifier()
forest_model.fit(X_train, y_train)
y_pred_forest = forest_model.predict(X_test)

print("Random Forest - Accuracy:", accuracy_score(y_test, y_pred_forest))
print("Random Forest - Precision:", precision_score(y_test, y_pred_forest))
print("Random Forest - Recall:", recall_score(y_test, y_pred_forest))
print("Random Forest - F1 Score:", f1_score(y_test, y_pred_forest))


In [None]:

# Hyperparameter tuning for Random Forest Classifier
param_grid = {'n_estimators': [50, 100, 200], 'max_depth': [3, 5, 7]}
grid = GridSearchCV(RandomForestClassifier(), param_grid, cv=5)
grid.fit(X_train, y_train)

print("Best parameters for Random Forest Classifier:", grid.best_params_)


In [None]:

# Saving the best model using joblib
joblib.dump(grid.best_estimator_, 'best_loan_model.pkl')
