<a href="https://colab.research.google.com/github/vndunaga/Predicting-a-Loan-Default/blob/main/Loan_Default_Flask_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


# Creating a dataframe from the dataset
df = pd.read_csv("/content/drive/MyDrive/Loan/accepted_2007_to_2018Q4 (1).csv.gz", compression = "gzip", low_memory=False)


In [None]:

# Step 1: Get unique loan statuses
unique_loan_statuses = df['loan_status'].unique()

# Initialize an empty DataFrame to store the selected data
selected_data = pd.DataFrame()

# Step 2 and 3: Select random data points for each unique loan status
for status in unique_loan_statuses:
    # Select all data points with the current loan status
    data_with_status = df[df['loan_status'] == status]

    # Check if there is at least one data point with the current loan status
    if len(data_with_status) > 0:
        # Randomly select one data point
        random_data_point = data_with_status.sample(n=1)

        # Append the selected data point to the selected_data DataFrame
        selected_data = selected_data.append(random_data_point)

selected_data.reset_index(drop=True, inplace=True)

# Now, generate additional random samples for the remaining count (total 25 samples)
remaining_samples = 100 - len(selected_data)

# Select additional random data points from the entire dataset (without considering loan status)
additional_samples = df.sample(n=remaining_samples)

# Append the additional random samples to the selected_data DataFrame
selected_data = selected_data.append(additional_samples)

# Reset the index again after adding the additional samples
selected_data.reset_index(drop=True, inplace=True)

# Display the selected data
selected_data.to_csv("test.csv")

In [None]:

# Filter the original dataset to include only "Defaulters"
defaulters_data = df[df['loan_status'] == 'Default']

# Check if there are at least 25 defaulters
if len(defaulters_data) >= 25:
    # Randomly select 25 defaulters
    selected_defaulters = defaulters_data.sample(n=25, random_state=42)
else:
    # If there are fewer than 25 defaulters, select all available defaulters
    selected_defaulters = defaulters_data

# Reset the index of the selected_defaulters DataFrame
selected_defaulters.reset_index(drop=True, inplace=True)

# Display the selected defaulters data
selected_defaulters.to_csv("testss.csv")


In [None]:
from google.colab.output import eval_js
print(eval_js("google.colab.kernel.proxyPort(5000)"))

In [None]:
from flask import Flask, render_template, request
import pandas as pd
import numpy as np
import joblib
import requests
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import LabelEncoder


class CustomLabelEncoder(BaseEstimator, TransformerMixin):
    def __init__(self, columns):
        self.columns = columns
        self.encoders = {}

    def fit(self, X, y=None):
        for column in self.columns:
            encoder = LabelEncoder()
            encoder.fit(X[column])
            self.encoders[column] = encoder
        return self

    def transform(self, X):
        X_encoded = X.copy()
        for column in self.columns:
            X_encoded[column] = self.encoders[column].transform(X[column])
        return X_encoded

    def inverse_transform(self, X):
        X_decoded = X.copy()
        for column in self.columns:
            X_decoded[column] = self.encoders[column].inverse_transform(X[column])
        return X_decoded


app = Flask(__name__, template_folder='/content/templates')





selected_columns = [
    'loan_amnt', 'term', 'int_rate', 'grade', 'sub_grade', 'home_ownership',
    'annual_inc', 'verification_status', 'purpose', 'dti',
    'inq_last_6mths', 'pub_rec', 'revol_util', 'total_acc', 'open_acc',
    'hardship_flag', 'mort_acc', 'open_acc_6m', 'acc_now_delinq',
    'pub_rec_bankruptcies', 'tax_liens'
]

columns=["term", "grade", "sub_grade", "home_ownership", "verification_status", "purpose", "hardship_flag"]



@app.route('/')
def index():
    return render_template('index.html')

@app.route('/check_transaction', methods=['POST'])
def check_transaction():
    loan_df = request.files['transaction_file']
    loan_df = pd.read_csv(loan_df)

    loan_df_filtered = loan_df[selected_columns].copy()

    # Load the Label Encoder Transformer
    with open('/content/label_encoder_transformer.joblib', 'rb') as f:
        transformer = joblib.load(f)


    # loan_df_filtered["loan_status"] = loan_df_filtered["loan_status"].replace(cleanup)
    # loan_df_filtered['loan_status'] = loan_df_filtered.loan_status.replace({'Paid': 1, 'Default': 0})



    loan_df_filtered = loan_df_filtered.fillna(0)
    loan_df_filtered = transformer.transform(loan_df_filtered)
    # loan_df_cleaned = loan_df_filtered.drop(['loan_status'], axis=1)





    # Load the first model
    rf_model = joblib.load("rf_model.pkl")

        # Load the second model
    mlp_model = joblib.load("mlp_model.pkl")

    print("loan_df_filtered columns:", loan_df_filtered.columns)

        # Make predictions with the first model
    pred1 = rf_model.predict(loan_df_filtered)
    print(pred1)
    # Make predictions with the second model
    pred2 = mlp_model.predict(loan_df_filtered)
    print(pred2)

    # Initialize empty array to store results
    results = []

    for i in range(len(pred1)):
        if pred1[i] == 0 and pred2[i] == 0:
            results.append("Defaulter")
        elif pred1[i] == 1 and pred2[i] == 1:
            results.append("Non-Defaulter")
        else:
            results.append("Suspecious")

      # Create a DataFrame with results and member_id (Transaction ID)
    result_df = pd.DataFrame({"id": loan_df["id"], "Result": results})

    # Merge the results DataFrame with the original DataFrame on member_id
    merged_df = loan_df.merge(result_df, on='id', how='left')

    # render the HTML table with pagination
    result_table = merged_df[["id", "Result"]].to_html(index=False, classes="table table-striped")
    return render_template('index.html', prediction=result_table)


if __name__ == "__main__":
   app.run()