In [2]:
!pip install pandas openpyxl

import pandas as pd
import io
import requests


file_path = "https://www.dropbox.com/scl/fi/113aenklgr6x0oru9l58k/Task-3-and-4_Loan_Data.xlsx?rlkey=c864pxz4wzz28ybaswrqmrunj&dl=1"


response = requests.get(file_path)
response.raise_for_status()

loan_data = pd.read_excel(io.BytesIO(response.content))


loan_data_cleaned = loan_data.drop(columns=['customer_id'])


X = loan_data_cleaned.drop(columns=['default'])
y = loan_data_cleaned['default']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42)
}


model_performance = {}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred_prob = model.predict_proba(X_test_scaled)[:, 1]
    auc_score = roc_auc_score(y_test, y_pred_prob)
    model_performance[name] = auc_score


best_model = models["Logistic Regression"]


def expected_loss(credit_lines_outstanding, loan_amt_outstanding, total_debt_outstanding, income, years_employed, fico_score):
    """
    Estimate the probability of default (PD) and compute the expected loss on a loan.

    Parameters:
    - credit_lines_outstanding (int): Number of credit lines
    - loan_amt_outstanding (float): Loan amount outstanding
    - total_debt_outstanding (float): Total debt outstanding
    - income (float): Borrower's income
    - years_employed (int): Years employed
    - fico_score (int): Borrower's FICO score

    Returns:
    - expected_loss (float): Computed expected loss on the loan
    """

    input_data = [[credit_lines_outstanding, loan_amt_outstanding, total_debt_outstanding, income, years_employed, fico_score]]
    input_scaled = scaler.transform(input_data)


    pd_estimate = best_model.predict_proba(input_scaled)[:, 1][0]

    # Compute expected loss (EL = PD * (1 - Recovery Rate) * Loan Amount)
    recovery_rate = 0.10  # Given recovery rate of 10%
    el = pd_estimate * (1 - recovery_rate) * loan_amt_outstanding

    return el

sample_loss = expected_loss(3, 5000, 10000, 60000, 5, 650)
print(f"Expected Loss: ${sample_loss:.2f}")


Expected Loss: $48.66


