In [3]:
import pandas as pd
import numpy as np
import joblib

# Load trained model
model = joblib.load("login_isolation_forest.pkl")

# Load scored dataset
df = pd.read_csv("login_events_scored.csv")

print("Model & Scored Data Loaded")
print(df.head())


Model & Scored Data Loaded
   login_hour  login_dayofweek  is_weekend  time_since_last_login  \
0   -0.345567         0.514377           0              -0.295274   
1    0.236392         1.020157           1              -0.872549   
2   -0.927526         0.008598           0              -0.524244   
3    0.236392        -1.508739           0              -0.555857   
4    1.400309         0.514377           0               0.148359   

   device_changed  ip_changed  location_changed  LoginAttempts  failed_login  \
0               1           1                 1       0.140228             0   
1               0           1                 0       0.562603             0   
2               1           1                 0      -0.704521             0   
3               1           1                 1       1.407352             0   
4               1           1                 1       0.562603             0   

   high_login_attempts  anomaly_label  anomaly_score  is_anomaly  
0         

In [4]:
total_events = len(df)
anomalies = df['is_anomaly'].sum()
normal = total_events - anomalies

print("\n--- Unsupervised Evaluation Summary ---")
print(f"Total Events     : {total_events}")
print(f"Normal Events    : {normal}")
print(f"Anomalous Events : {anomalies}")
print(f"Anomaly Rate (%) : {(anomalies / total_events) * 100:.2f}")



--- Unsupervised Evaluation Summary ---
Total Events     : 5000
Normal Events    : 4900
Anomalous Events : 100
Anomaly Rate (%) : 2.00


In [5]:
print("\n--- Anomaly Score Statistics ---")
print(df['anomaly_score'].describe())

print("\nLowest (Most Anomalous) Scores:")
print(df.sort_values("anomaly_score").head(10)[
    ['anomaly_score', 'is_anomaly']
])



--- Anomaly Score Statistics ---
count    5000.000000
mean        0.126407
std         0.053825
min        -0.074401
25%         0.092367
50%         0.138149
75%         0.167741
max         0.220140
Name: anomaly_score, dtype: float64

Lowest (Most Anomalous) Scores:
      anomaly_score  is_anomaly
1995      -0.074401           1
4326      -0.054692           1
3945      -0.051134           1
1691      -0.048928           1
3208      -0.047536           1
4730      -0.047204           1
2804      -0.046514           1
4899      -0.045867           1
338       -0.045575           1
258       -0.043869           1


In [6]:
threshold_1 = np.percentile(df['anomaly_score'], 1)
threshold_5 = np.percentile(df['anomaly_score'], 5)

print("\n--- Score Thresholds ---")
print(f"1% percentile score : {threshold_1}")
print(f"5% percentile score : {threshold_5}")

detected_below_1 = (df['anomaly_score'] <= threshold_1).sum()
detected_below_5 = (df['anomaly_score'] <= threshold_5).sum()

print(f"Events below 1% threshold : {detected_below_1}")
print(f"Events below 5% threshold : {detected_below_5}")



--- Score Thresholds ---
1% percentile score : -0.01627916852943319
5% percentile score : 0.023356193753878436
Events below 1% threshold : 50
Events below 5% threshold : 250


In [7]:
X = df.drop(columns=['anomaly_label', 'anomaly_score', 'is_anomaly']).values

noise = np.random.normal(0, 0.001, X.shape)
X_noisy = X + noise

pred_original = model.predict(X)
pred_noisy = model.predict(X_noisy)

stability = np.mean(pred_original == pred_noisy)

print("\n--- Stability Test ---")
print(f"Prediction Consistency : {stability * 100:.2f}%")



--- Stability Test ---
Prediction Consistency : 100.00%
