In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# Load the data
data = pd.read_excel('loan data (1).xlsx')

# Preprocessing
# Fill missing values
data['Cibil Score'].fillna(data['Cibil TRULYScore'].median(), inplace=True)

# Encode categorical data
data = pd.get_dummies(data, columns=['Location'])

# Scale numerical features
scaler = StandardScaler()
numerical_features = ['Cibil Score', 'Age', 'Net Salary', 'Unnamed: 4']
data[numerical_features] = scaler.fit_transform(data[numerical_features])

# Encode target variable
label_encoder = LabelEncoder()
data['Disbursed'] = label_encoder.fit_transform(data['Disbursed'])

# Split the data
X = data.drop(columns=['Disbursed', 'Disbursed amount'])
y = data['Disbursed']
X_train, X_test, y_train, y_test = train_test_size(X, y, test_size=0.2, random_state=42)

# Model training
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Model evaluation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
roc_auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])

print("Accuracy:", accuracy)
print("Classification Report:\n", report)
print("ROC-AUC Score:", roc_auc)
