# Anomaly Detection with PyCaret
This notebook uses PyCaret to perform unsupervised anomaly detection on `bank_transactions_data_2.csv`.

**Dataset:**
- Transactional and customer features for 2,512 samples.

**Steps:**
1. Load the dataset
2. Set up PyCaret anomaly detection experiment
3. Create and evaluate an anomaly detection model
4. Assign anomaly labels and save the model

In [1]:
# Import required libraries
import pandas as pd
import numpy as np

In [2]:
# Load the dataset
data = pd.read_csv('bank_transactions_data_2.csv')
data.head()

Unnamed: 0,TransactionID,AccountID,TransactionAmount,TransactionDate,TransactionType,Location,DeviceID,IP Address,MerchantID,Channel,CustomerAge,CustomerOccupation,TransactionDuration,LoginAttempts,AccountBalance,PreviousTransactionDate
0,TX000001,AC00128,14.09,2023-04-11 16:29:14,Debit,San Diego,D000380,162.198.218.92,M015,ATM,70,Doctor,81,1,5112.21,2024-11-04 08:08:08
1,TX000002,AC00455,376.24,2023-06-27 16:44:19,Debit,Houston,D000051,13.149.61.4,M052,ATM,68,Doctor,141,1,13758.91,2024-11-04 08:09:35
2,TX000003,AC00019,126.29,2023-07-10 18:16:08,Debit,Mesa,D000235,215.97.143.157,M009,Online,19,Student,56,1,1122.35,2024-11-04 08:07:04
3,TX000004,AC00070,184.5,2023-05-05 16:32:11,Debit,Raleigh,D000187,200.13.225.150,M002,Online,26,Student,25,1,8569.06,2024-11-04 08:09:06
4,TX000005,AC00411,13.45,2023-10-16 17:51:24,Credit,Atlanta,D000308,65.164.3.100,M091,Online,26,Student,198,1,7429.4,2024-11-04 08:06:39


In [3]:
# PyCaret anomaly detection setup
from pycaret.anomaly import AnomalyExperiment
exp = AnomalyExperiment()
exp.setup(data=data, session_id=123, normalize=True)

Unnamed: 0,Description,Value
0,Session id,123
1,Original data shape,"(2512, 16)"
2,Transformed data shape,"(2512, 7308)"
3,Numeric features,5
4,Categorical features,11
5,Preprocess,True
6,Imputation type,simple
7,Numeric imputation,mean
8,Categorical imputation,mode
9,Maximum one-hot encoding,-1


<pycaret.anomaly.oop.AnomalyExperiment at 0x7f3334da6310>

In [4]:
# Create an anomaly detection model (Isolation Forest)
knn = exp.create_model('knn')
# Evaluate the model (interactive, notebook only)
exp.evaluate_model(knn)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [5]:
# Assign anomaly labels to the data
anomaly_results = exp.assign_model(knn)
anomaly_results.head()

Unnamed: 0,TransactionID,AccountID,TransactionAmount,TransactionDate,TransactionType,Location,DeviceID,IP Address,MerchantID,Channel,CustomerAge,CustomerOccupation,TransactionDuration,LoginAttempts,AccountBalance,PreviousTransactionDate,Anomaly,Anomaly_Score
0,TX000001,AC00128,14.09,2023-04-11 16:29:14,Debit,San Diego,D000380,162.198.218.92,M015,ATM,70,Doctor,81,1,5112.209961,2024-11-04 08:08:08,0,114.982454
1,TX000002,AC00455,376.23999,2023-06-27 16:44:19,Debit,Houston,D000051,13.149.61.4,M052,ATM,68,Doctor,141,1,13758.910156,2024-11-04 08:09:35,0,114.29449
2,TX000003,AC00019,126.290001,2023-07-10 18:16:08,Debit,Mesa,D000235,215.97.143.157,M009,Online,19,Student,56,1,1122.349976,2024-11-04 08:07:04,0,114.119878
3,TX000004,AC00070,184.5,2023-05-05 16:32:11,Debit,Raleigh,D000187,200.13.225.150,M002,Online,26,Student,25,1,8569.05957,2024-11-04 08:09:06,0,112.495665
4,TX000005,AC00411,13.45,2023-10-16 17:51:24,Credit,Atlanta,D000308,65.164.3.100,M091,Online,26,Student,198,1,7429.399902,2024-11-04 08:06:39,0,115.033921


In [6]:
exp.plot_model(knn,plot='umap')

In [7]:
# Save the model for future use
exp.save_model(knn, 'best_anomaly_model')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['TransactionAmount', 'CustomerAge',
                                              'TransactionDuration',
                                              'LoginAttempts',
                                              'AccountBalance'],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=['TransactionID', 'AccountID',
                                              'TransactionDate',
                                              'TransactionType', 'Location',
                                              'DeviceID'...
                                                                     'MerchantID',
                                                                     'Channel',
                                                                     'CustomerOccupation',
 