In [None]:
# These are usually pre-installed in Colab
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Optional settings for better visuals
sns.set(style='whitegrid')


In [None]:
from google.colab import files
uploaded = files.upload()


In [None]:
import io

# Replace with your uploaded filename
df = pd.read_csv(io.BytesIO(uploaded['Geldium_Dataset.csv']))

# Preview the dataset
df.head()


In [None]:
# Basic shape and column info
print("Shape:", df.shape)
print("\nColumns:\n", df.columns.tolist())

# Data types and null values
df.info()

# Summary statistics
df.describe()


In [None]:
# Show number of missing values in each column
df.isnull().sum().sort_values(ascending=False)


In [None]:
# Impute missing income with median
df['Income'].fillna(df['Income'].median(), inplace=True)


In [None]:
# Impute missing credit utilization with mean
df['Credit_Utilization'].fillna(df['Credit_Utilization'].mean(), inplace=True)


In [None]:
df.isnull().sum()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 5))
sns.boxplot(x='Delinquent_Account', y='Credit_Utilization', data=df)
plt.title('Credit Utilization vs. Delinquency')
plt.xlabel('Delinquent Account (0=No, 1=Yes)')
plt.ylabel('Credit Utilization (%)')
plt.show()


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Select features
X = df[['Income', 'Credit_Utilization', 'Missed_Payments', 'Debt_to_Income_Ratio']]
y = df['Delinquent_Account']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict & evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
# Select features you cleaned and explored
features = ['Income', 'Credit_Utilization', 'Missed_Payments', 'Debt_to_Income_Ratio']
X = df[features]

# Target: 0 = not delinquent, 1 = delinquent
y = df['Delinquent_Account']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)

# Evaluation results
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
