In [1]:
# ---------------------------------------------------------
# TRANSACTION ANOMALY DETECTION — ISOLATION FOREST TRAINING
# ---------------------------------------------------------

import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
import joblib


In [2]:
# ---------------------------------------------------------
# 1. Load preprocessed transaction data
# ---------------------------------------------------------

df = pd.read_csv("transaction_features.csv")

print("Loaded transaction_features.csv")
print(df.head())


# X contains only scaled features (already normalized)
X = df.values     # (numpy array)

Loaded transaction_features.csv
   TransactionAmount      hour  day_of_week       day  time_since_last  \
0          -0.855820  0.511423     1.688773 -0.094437        -0.267207   
1          -0.289919 -0.856729    -0.296655 -0.208599         0.180808   
2           0.614606 -0.856729    -0.296655 -0.665243        -0.267207   
3          -0.816319 -0.856729    -0.296655  1.389657        -0.053405   
4          -0.976312 -0.856729     1.688773 -1.236049         0.308929   

   tx_last_1hr  tx_last_24hr  amount_zscore  amount_to_balance  \
0         -1.5          -1.5      -0.789118          -0.335036   
1         -1.0          -1.0       0.789118          -0.291844   
2         -0.5          -0.5       1.044987           0.419096   
3          0.0           0.0      -1.336840          -0.371645   
4          0.5           0.5      -1.603154          -0.388045   

   device_changed  ...  location_changed  TransactionType  Location  DeviceID  \
0        0.019956  ...          0.152374     

In [3]:

# ---------------------------------------------------------
# 2. Train Isolation Forest
# ---------------------------------------------------------

iso_tx = IsolationForest(
    n_estimators=350,
    contamination=0.03,    # 1–5% anomalies is typical
    max_samples='auto',
    random_state=42,
    n_jobs=-1
)

iso_tx.fit(X)

print("\nIsolation Forest training completed.")


Isolation Forest training completed.


In [4]:
# ---------------------------------------------------------
# 3. Predict anomalies
# ---------------------------------------------------------

pred = iso_tx.predict(X)

# Convert:
#   1  -> normal
#  -1 -> anomaly
df_pred = pd.DataFrame(df.copy())
df_pred["anomaly"] = np.where(pred == -1, 1, 0)

print("\nAnomaly counts:")
print(df_pred["anomaly"].value_counts())


Anomaly counts:
anomaly
0    2436
1      76
Name: count, dtype: int64


In [5]:
# ---------------------------------------------------------
# 4. Compute anomaly scores
# ---------------------------------------------------------

scores = iso_tx.decision_function(X)
df_pred["anomaly_score"] = scores

# More negative = more anomalous
print("\nExample anomaly scores:")
print(df_pred[["anomaly", "anomaly_score"]].head())



Example anomaly scores:
   anomaly  anomaly_score
0        0       0.078317
1        0       0.067355
2        0       0.061936
3        0       0.073019
4        0       0.052262


In [6]:
# ---------------------------------------------------------
# 5. Save model
# ---------------------------------------------------------

joblib.dump(iso_tx, "transaction_isolation_forest_model.pkl")

print("\nModel saved: transaction_isolation_forest_model.pkl")



Model saved: transaction_isolation_forest_model.pkl


In [7]:
# ---------------------------------------------------------
# 6. Save prediction file for analysis
# ---------------------------------------------------------

df_pred.to_csv("transaction_predictions.csv", index=False)

print("\nSaved: transaction_predictions.csv")
print("\nTransaction Isolation Forest training DONE!")


Saved: transaction_predictions.csv

Transaction Isolation Forest training DONE!
