In [44]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,f1_score,precision_score,recall_score

In [45]:
df = pd.read_csv("creditcard.csv")

X = df.drop("Class",axis = 1)
Y = df["Class"]
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 0.3,random_state = 42,stratify=Y)
X_normal = X_train[Y_train==0]

In [46]:
scalar = StandardScaler()
X_normal_scaled = scalar.fit_transform(X_normal)
X_test_scaled = scalar.transform(X_test)

In [47]:
mu = np.mean(X_normal_scaled,axis = 0)
sigma = np.cov(X_normal_scaled,rowvar=False,bias = True)

inv_Sigma = np.linalg.inv(sigma)
det_Sigma = np.linalg.det(sigma)

In [48]:
def gaussian_prob(X):
    diff = X-mu
    exp_term = np.sum(diff @ inv_Sigma * diff, axis = 1)
    return np.exp(-0.5*exp_term) / np.sqrt((2*np.pi)**X.shape[1]*det_Sigma)

In [None]:
p = gaussian_prob(X_test_scaled)
percentiles = [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 1]
best_f1 = -1
best_threshold = None

for t in percentiles:
    threshold = np.percentile(p, t)
    pred = (p < threshold).astype(int)
    current_f1 = f1_score(Y_test, pred)

    print(f"Percentile {t}% → F1 Score = {current_f1:.4f}")

    if current_f1 > best_f1:
        best_f1 = current_f1
        best_threshold = threshold

print("\n✔ Best Threshold Selected:", best_threshold)

# Final prediction on validation data
final_pred = (p < best_threshold).astype(int)

# Final Evaluation
print("\nGaussian Model Final Results:")
print("Precision:", precision_score(Y_test, final_pred))
print("Recall:", recall_score(Y_test, final_pred))
print("F1 Score:", f1_score(Y_test, final_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, final_pred))

Percentile 0.01% → F1 Score = 0.0000
Percentile 0.05% → F1 Score = 0.0000
Percentile 0.1% → F1 Score = 0.0000
Percentile 0.2% → F1 Score = 0.4828
Percentile 0.3% → F1 Score = 0.4988
Percentile 0.5% → F1 Score = 0.3993
Percentile 1% → F1 Score = 0.2313

✔ Best Threshold Selected: 1.5071071196616182e-169

Gaussian Model Final Results:
Precision: 0.39299610894941633
Recall: 0.6824324324324325
F1 Score: 0.49876543209876545

Confusion Matrix:
[[85139   156]
 [   47   101]]
