In [4]:
# --- Imports ---
import pandas as pd
import numpy as np
import joblib
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report

print("--- Reverting to the Simpler, Better-Performing Model ---")

# --- Step 1: Load the Dataset ---
print("Loading 'upi_transactions.csv'...")
try:
    df = pd.read_csv('upi_transactions.csv')
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    print("...Dataset loaded successfully.")
except FileNotFoundError:
    print("Error: 'upi_transactions.csv' not found.")

# --- Step 2: Simple Feature Engineering ---
print("Creating simple features...")
df['hour_of_day'] = df['timestamp'].dt.hour
print("...'hour_of_day' feature created.")

# --- Step 3: Train the Model with Simple Features ---
print("\nTraining the model on the simple but effective feature set...")

# We use only the two features that we know work well
features = ['amount', 'hour_of_day']
X = df[features]
y = df['is_fraud']

contamination_rate = y.mean()
model = IsolationForest(n_estimators=100, contamination=contamination_rate, random_state=42)
model.fit(X)

print("...Model training complete.")

# --- Step 4: Save the Trained Model ---
joblib.dump(model, 'fraud_model.joblib')
print("Better-performing model saved to 'fraud_model.joblib'")

# --- Step 5: Evaluate the Model's Performance ---
print("\n--- Model Evaluation ---")
predictions = model.predict(X)
pred_labels = np.where(predictions == -1, 1, 0)
report = classification_report(y, pred_labels, target_names=['Normal (0)', 'Fraud (1)'])

print("Classification Report:")
print(report)


--- Reverting to the Simpler, Better-Performing Model ---
Loading 'upi_transactions.csv'...
...Dataset loaded successfully.
Creating simple features...
...'hour_of_day' feature created.

Training the model on the simple but effective feature set...
...Model training complete.
Better-performing model saved to 'fraud_model.joblib'

--- Model Evaluation ---
Classification Report:
              precision    recall  f1-score   support

  Normal (0)       0.98      0.98      0.98      1000
   Fraud (1)       0.40      0.40      0.40        30

    accuracy                           0.97      1030
   macro avg       0.69      0.69      0.69      1030
weighted avg       0.97      0.97      0.97      1030

