In [4]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import io
import requests


file_path = "https://www.dropbox.com/scl/fi/113aenklgr6x0oru9l58k/Task-3-and-4_Loan_Data.xlsx?rlkey=c864pxz4wzz28ybaswrqmrunj&dl=1"



response = requests.get(file_path)
response.raise_for_status()


df = pd.read_excel(io.BytesIO(response.content))


num_buckets = 10


fico_scores = df['fico_score'].values.reshape(-1, 1)


kmeans = KMeans(n_clusters=num_buckets, random_state=42, n_init=10)
df['bucket'] = kmeans.fit_predict(fico_scores)


bucket_boundaries = np.sort(kmeans.cluster_centers_.flatten())


df['rating'] = df['bucket'].rank(method="dense", ascending=False).astype(int)

bucket_stats = df.groupby('rating').agg(
    min_fico=('fico_score', 'min'),
    max_fico=('fico_score', 'max'),
    mean_fico=('fico_score', 'mean'),
    num_records=('fico_score', 'count'),
    num_defaults=('default', 'sum')
).reset_index()


bucket_stats['default_rate'] = bucket_stats['num_defaults'] / bucket_stats['num_records']


print(bucket_stats)


   rating  min_fico  max_fico   mean_fico  num_records  num_defaults  \
0       1       661       685  672.511710         1409           134   
1       2       612       636  624.213755         1614           272   
2       3       408       512  489.308017          237           160   
3       4       553       584  569.994516         1094           357   
4       5       756       850  779.060606          231             4   
5       6       637       660  648.314672         1554           205   
6       7       716       755  731.829001          731            29   
7       8       585       611  598.523948         1378           331   
8       9       686       715  699.403523         1192            91   
9      10       513       552  535.930357          560           268   

   default_rate  
0      0.095103  
1      0.168525  
2      0.675105  
3      0.326325  
4      0.017316  
5      0.131918  
6      0.039672  
7      0.240203  
8      0.076342  
9      0.478571  
