<h1><center>Fraud Detection System</center></h1>

## **Importing Libraries**

In [50]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import IsolationForest
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from cryptography.fernet import Fernet

## **Generating Transaction Data with Numpy**

In [51]:
# Parameters
num_transactions = 40000   # 40000 transactions
fraud_ratio = 0.1  # 10% of the transactions are fraud

# Transaction Types
transaction_types = ['purchase', 'transfer', 'withdrawal', 'payment']

# Merchant Categories
merchant_categories = ['electronics', 'clothing', 'groceries', 'travel', 'entertainment']

In [52]:
presentday = datetime.now() # or presentday = datetime.today()

yesterday = presentday - timedelta(30)
print(yesterday)

2024-06-14 18:24:00.374078


In [53]:
# Generate Transaction Data
data = {
    'transaction_id': np.arange(num_transactions), # Arranges the num_trasnsactions in ascending order
    'user_id': np.random.randint(100, 1000, num_transactions), # Randomly generates user_id between 100 to 1000
    'transaction_amount': np.round(np.random.uniform(100, 10000, num_transactions), 2), # Randomly generates transaction_amount between 1000 to 10000 for corresponding user_id
    'transaction_time': pd.date_range(start = yesterday, periods=num_transactions, freq='T'), # Generates transaction data for the past 30 days
    'transaction_type': np.random.choice(transaction_types, num_transactions),
    'merchant_category': np.random.choice(merchant_categories, num_transactions),
    'is_fraud': np.random.choice([0, 1], num_transactions, p=[1-fraud_ratio, fraud_ratio]) # Randomly 0's and 1's are assigned to correspoding transactions where 1 denotes fraudulant transaction
}

In [54]:
# Create DataFrame
transaction_data = pd.DataFrame(data)

In [55]:
import os

workingDir = 'FraudDetectionSystem'

# Check if the directory exists, if not, create it
if not os.path.exists(workingDir):
    os.makedirs(workingDir)

In [56]:
subfolder = os.path.join(workingDir, 'data')

# Save dataset to CSV file
data_path = os.path.join(subfolder, 'transaction_data.csv')

transaction_data.to_csv('transaction_data.csv', index=False)


In [57]:
# Display transaction data
transaction_data

Unnamed: 0,transaction_id,user_id,transaction_amount,transaction_time,transaction_type,merchant_category,is_fraud
0,0,757,1384.46,2024-06-14 18:24:00.374078,payment,clothing,0
1,1,516,1505.20,2024-06-14 18:25:00.374078,payment,clothing,0
2,2,418,7042.24,2024-06-14 18:26:00.374078,payment,groceries,0
3,3,598,5129.82,2024-06-14 18:27:00.374078,transfer,groceries,0
4,4,564,2157.65,2024-06-14 18:28:00.374078,withdrawal,travel,0
...,...,...,...,...,...,...,...
39995,39995,783,8921.67,2024-07-12 12:59:00.374078,transfer,entertainment,0
39996,39996,375,3520.48,2024-07-12 13:00:00.374078,transfer,clothing,0
39997,39997,415,4792.27,2024-07-12 13:01:00.374078,transfer,travel,0
39998,39998,795,3814.37,2024-07-12 13:02:00.374078,payment,electronics,0


## **Generate Encryption Key**

In [58]:
key = Fernet.generate_key()
cipher = Fernet(key)

## **Feature Engineering**

In [59]:
# Load the dataset
transaction_data = pd.read_csv('transaction_data.csv')

In [60]:
# Display transaction data
transaction_data

Unnamed: 0,transaction_id,user_id,transaction_amount,transaction_time,transaction_type,merchant_category,is_fraud
0,0,757,1384.46,2024-06-14 18:24:00.374078,payment,clothing,0
1,1,516,1505.20,2024-06-14 18:25:00.374078,payment,clothing,0
2,2,418,7042.24,2024-06-14 18:26:00.374078,payment,groceries,0
3,3,598,5129.82,2024-06-14 18:27:00.374078,transfer,groceries,0
4,4,564,2157.65,2024-06-14 18:28:00.374078,withdrawal,travel,0
...,...,...,...,...,...,...,...
39995,39995,783,8921.67,2024-07-12 12:59:00.374078,transfer,entertainment,0
39996,39996,375,3520.48,2024-07-12 13:00:00.374078,transfer,clothing,0
39997,39997,415,4792.27,2024-07-12 13:01:00.374078,transfer,travel,0
39998,39998,795,3814.37,2024-07-12 13:02:00.374078,payment,electronics,0


In [61]:
# Converts transaction_time to datetime format & extracts hour and day of the week from the transaction time
transaction_data['transaction_time'] = pd.to_datetime(transaction_data['transaction_time'])
transaction_data['transaction_hour'] = transaction_data['transaction_time'].dt.hour
transaction_data['transaction_day'] = transaction_data['transaction_time'].dt.dayofweek

In [62]:
# Display transaction data
transaction_data

Unnamed: 0,transaction_id,user_id,transaction_amount,transaction_time,transaction_type,merchant_category,is_fraud,transaction_hour,transaction_day
0,0,757,1384.46,2024-06-14 18:24:00.374078,payment,clothing,0,18,4
1,1,516,1505.20,2024-06-14 18:25:00.374078,payment,clothing,0,18,4
2,2,418,7042.24,2024-06-14 18:26:00.374078,payment,groceries,0,18,4
3,3,598,5129.82,2024-06-14 18:27:00.374078,transfer,groceries,0,18,4
4,4,564,2157.65,2024-06-14 18:28:00.374078,withdrawal,travel,0,18,4
...,...,...,...,...,...,...,...,...,...
39995,39995,783,8921.67,2024-07-12 12:59:00.374078,transfer,entertainment,0,12,4
39996,39996,375,3520.48,2024-07-12 13:00:00.374078,transfer,clothing,0,13,4
39997,39997,415,4792.27,2024-07-12 13:01:00.374078,transfer,travel,0,13,4
39998,39998,795,3814.37,2024-07-12 13:02:00.374078,payment,electronics,0,13,4


## **Data Encryption**

In [63]:
# Encrypts the transaction_id and transaction_time columns
transaction_data['transaction_id'] = transaction_data['transaction_id'].apply(lambda x: cipher.encrypt(str(x).encode()).decode())
transaction_data['transaction_time'] = transaction_data['transaction_time'].apply(lambda x: cipher.encrypt(str(x).encode()).decode())
transaction_data['user_id'] = transaction_data['user_id'].apply(lambda x: cipher.encrypt(str(x).encode()).decode())

# Saves the encryption key into file
with open('encryption_key.key', 'wb') as key_file:
    key_file.write(key)

In [64]:
# Display transaction data
transaction_data

Unnamed: 0,transaction_id,user_id,transaction_amount,transaction_time,transaction_type,merchant_category,is_fraud,transaction_hour,transaction_day
0,gAAAAABmlBfBaM8N0w7aPm3ccTJ1MmgPgGn7dNGGWK1UPY...,gAAAAABmlBfI-ZZn75vfZInvsFW4HDMvIO1ZCiN83Ey-bq...,1384.46,gAAAAABmlBfFHFKnRGBmcQUd4kCxse8BJth9Zn1k8ySUq-...,payment,clothing,0,18,4
1,gAAAAABmlBfBnMp-_5gzhBh0YiCDFbFA8mPUwDs9ciKY35...,gAAAAABmlBfIkn65m4C-rzwRmACU_AZ06WihJhFyoYgYvr...,1505.20,gAAAAABmlBfF_20qDJTACjv7s5GpE1CKMRF_lCkx74fZ0C...,payment,clothing,0,18,4
2,gAAAAABmlBfBZ7h-jtfuUgPGRi6Fjo2PPGY7x03TqdUr9u...,gAAAAABmlBfI3dttzVTjdUmMrEkXQd6vCd4Qno_Bx3IbBp...,7042.24,gAAAAABmlBfFG4pVmaxHSDKdVoQ02FwDJJbcyNpZsdHM4O...,payment,groceries,0,18,4
3,gAAAAABmlBfBRqHaOXT3LUFBsR-6ueieIZOThbDDxA0x_R...,gAAAAABmlBfIgg49Ss1gg5NAH389KrAYLJcpspLZ2YOfLE...,5129.82,gAAAAABmlBfFtb883J1CCiXWXtioXJvRIarh0s2Eq7RUEC...,transfer,groceries,0,18,4
4,gAAAAABmlBfBurVjGBfOAsKiYU993iDr0dUsajV8Yc8Ms9...,gAAAAABmlBfInxEsUNzYP9DufJtF95ebid5HtB2XMLV5x3...,2157.65,gAAAAABmlBfFhY37Z32Y65tMXVidDSQpDlAtS93-sjVxmW...,withdrawal,travel,0,18,4
...,...,...,...,...,...,...,...,...,...
39995,gAAAAABmlBfF_hEmCNToI9Ip6OrxCGqDVQRtatTjQjeglH...,gAAAAABmlBfL7jTLEc_s57ToG7iiMEbcH-JX9wTV1pj9uo...,8921.67,gAAAAABmlBfI87ilZhBR_rZ2pcCb2U8fgl7ZVT9lwE6FQ1...,transfer,entertainment,0,12,4
39996,gAAAAABmlBfFtlgza7ZNstasEBODuXCrJ5AGZlKVTI5Wm8...,gAAAAABmlBfLAqehgWQJc4leuLF1tgc-y5MRV4A2agB0Kh...,3520.48,gAAAAABmlBfI3-UNKXh_OpjdUFLbibKsXtXxG2qoOfSWDP...,transfer,clothing,0,13,4
39997,gAAAAABmlBfF5IEjcFB49kfXkH84Mobwf9dkRNbX4JdrIO...,gAAAAABmlBfL3oZFtYmLXtyqi-CfEeaE-j5oEWulBank8v...,4792.27,gAAAAABmlBfIBBR8PGXIsP3UL3Aq47c9dN5LXNPJVESJ5X...,transfer,travel,0,13,4
39998,gAAAAABmlBfF4J8eb3X7uHHQfHZSKkewGvXoun2kxsPngu...,gAAAAABmlBfL4oijCl6iFlZ2p6gcMu39406gpR1Kv7guWR...,3814.37,gAAAAABmlBfIOLfkUyjWSJ3SEYm6y9o_LEdy33BCrOi7qL...,payment,electronics,0,13,4


## **Data Anonymization**

In [65]:
# Additional Feature: According to the business needs, the Sensitive Data can also be dropped to make the user's transaction untraceable
# transaction_data = transaction_data.drop(columns=['user_id']

## **Differential Privacy**

In [66]:
# creates numerical and categorical features
numerical_features = ['transaction_amount']
categorical_features = ['transaction_type', 'merchant_category']

In [67]:
# Add_noise function - Adds noise to the numerical features to ensure differential privacy
def add_noise(data, epsilon=0.1):
    noise = np.random.laplace(0, 1/epsilon, data.shape)
    return data + noise

transaction_data[numerical_features] = add_noise(transaction_data[numerical_features])

## **Encode Categorical Features**

In [68]:
# Creates a preprocessing pipeline to scale numerical features and one-hot encode categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

In [69]:
# Apply the preprocessing pipeline.

# Drops 3 columns from transaction data and rest of the columns are assigned to X variable
x = transaction_data.drop(columns=['transaction_id', 'transaction_time', 'is_fraud'])

# Y vairable is assigned 'is_fraud' column
y = transaction_data['is_fraud']

# Preprocess the features of X and
x_preprocessed = preprocessor.fit_transform(x)

In [70]:
# Convert the preprocessed data back to a DataFrame for better visualization
x_preprocessed_df = pd.DataFrame(x_preprocessed, columns=preprocessor.get_feature_names_out())

# Converts the preprocessed data back to a DataFrame and adds transaction_hour and transaction_day
x_preprocessed_df['transaction_hour'] = transaction_data['transaction_hour']
x_preprocessed_df['transaction_day'] = transaction_data['transaction_day']

## **Data Splitting**

In [71]:
# Dataset is split into 2 sets: Training and testing. Data is split in the ratio of 80:20, where 20% of the data is used for testing the model's evaluation.
x_train, x_test, y_train, y_test = train_test_split(x_preprocessed_df, y, test_size=0.2)

## **Anomaly Detection using Isolation Forest Algorithm**

In [72]:
iso_forest = IsolationForest(contamination=0.05)
iso_forest.fit(x_train)

train_anomalies = iso_forest.predict(x_train)
test_anomalies = iso_forest.predict(x_test)

train_anomalies = np.where(train_anomalies == -1, 1, 0)
test_anomalies = np.where(test_anomalies == -1, 1, 0)

x_train['anomaly'] = train_anomalies
x_test['anomaly'] = test_anomalies



## **Pattern Recognition using Neural Network**

In [73]:
model = Sequential()
model.add(Dense(64, input_dim=x_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [74]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [75]:
model.fit(x_train, y_train, epochs=20, batch_size=32, validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7ac88719baf0>

In [76]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Test Accuracy: {accuracy}')

Test Accuracy: 0.9012500047683716


In [77]:
y_pred = model.predict(x_test)
y_pred = (y_pred > 0.5).astype(int)



## **Model Evaluation**

In [79]:
from sklearn.metrics import classification_report, confusion_matrix
from simple_colors import *

print(blue("Confusion matrix:", ["bold"]))
print(confusion_matrix(y_test, y_pred))

print(blue('\nClassification Reports:', ["bold"]))
print(classification_report(y_test, y_pred))


[1;34mConfusion matrix:[0m
[[7210    0]
 [ 790    0]]
[1;34m
Classification Reports:[0m
              precision    recall  f1-score   support

           0       0.90      1.00      0.95      7210
           1       0.00      0.00      0.00       790

    accuracy                           0.90      8000
   macro avg       0.45      0.50      0.47      8000
weighted avg       0.81      0.90      0.85      8000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## **Displaying Enrcypted Transaction Data**

In [80]:
transaction_data

Unnamed: 0,transaction_id,user_id,transaction_amount,transaction_time,transaction_type,merchant_category,is_fraud,transaction_hour,transaction_day
0,gAAAAABmlBfBaM8N0w7aPm3ccTJ1MmgPgGn7dNGGWK1UPY...,gAAAAABmlBfI-ZZn75vfZInvsFW4HDMvIO1ZCiN83Ey-bq...,1381.890979,gAAAAABmlBfFHFKnRGBmcQUd4kCxse8BJth9Zn1k8ySUq-...,payment,clothing,0,18,4
1,gAAAAABmlBfBnMp-_5gzhBh0YiCDFbFA8mPUwDs9ciKY35...,gAAAAABmlBfIkn65m4C-rzwRmACU_AZ06WihJhFyoYgYvr...,1496.852507,gAAAAABmlBfF_20qDJTACjv7s5GpE1CKMRF_lCkx74fZ0C...,payment,clothing,0,18,4
2,gAAAAABmlBfBZ7h-jtfuUgPGRi6Fjo2PPGY7x03TqdUr9u...,gAAAAABmlBfI3dttzVTjdUmMrEkXQd6vCd4Qno_Bx3IbBp...,7023.063859,gAAAAABmlBfFG4pVmaxHSDKdVoQ02FwDJJbcyNpZsdHM4O...,payment,groceries,0,18,4
3,gAAAAABmlBfBRqHaOXT3LUFBsR-6ueieIZOThbDDxA0x_R...,gAAAAABmlBfIgg49Ss1gg5NAH389KrAYLJcpspLZ2YOfLE...,5130.829439,gAAAAABmlBfFtb883J1CCiXWXtioXJvRIarh0s2Eq7RUEC...,transfer,groceries,0,18,4
4,gAAAAABmlBfBurVjGBfOAsKiYU993iDr0dUsajV8Yc8Ms9...,gAAAAABmlBfInxEsUNzYP9DufJtF95ebid5HtB2XMLV5x3...,2160.250393,gAAAAABmlBfFhY37Z32Y65tMXVidDSQpDlAtS93-sjVxmW...,withdrawal,travel,0,18,4
...,...,...,...,...,...,...,...,...,...
39995,gAAAAABmlBfF_hEmCNToI9Ip6OrxCGqDVQRtatTjQjeglH...,gAAAAABmlBfL7jTLEc_s57ToG7iiMEbcH-JX9wTV1pj9uo...,8942.273210,gAAAAABmlBfI87ilZhBR_rZ2pcCb2U8fgl7ZVT9lwE6FQ1...,transfer,entertainment,0,12,4
39996,gAAAAABmlBfFtlgza7ZNstasEBODuXCrJ5AGZlKVTI5Wm8...,gAAAAABmlBfLAqehgWQJc4leuLF1tgc-y5MRV4A2agB0Kh...,3514.627601,gAAAAABmlBfI3-UNKXh_OpjdUFLbibKsXtXxG2qoOfSWDP...,transfer,clothing,0,13,4
39997,gAAAAABmlBfF5IEjcFB49kfXkH84Mobwf9dkRNbX4JdrIO...,gAAAAABmlBfL3oZFtYmLXtyqi-CfEeaE-j5oEWulBank8v...,4796.516440,gAAAAABmlBfIBBR8PGXIsP3UL3Aq47c9dN5LXNPJVESJ5X...,transfer,travel,0,13,4
39998,gAAAAABmlBfF4J8eb3X7uHHQfHZSKkewGvXoun2kxsPngu...,gAAAAABmlBfL4oijCl6iFlZ2p6gcMu39406gpR1Kv7guWR...,3814.870288,gAAAAABmlBfIOLfkUyjWSJ3SEYm6y9o_LEdy33BCrOi7qL...,payment,electronics,0,13,4


In [81]:
# Check for present working directory
%pwd

'/content'

In [82]:
# Saves the encrypted data into a CSV file
transaction_data.to_csv('encrypted_transaction_data.csv', index=False)

## **Decrypting the encrypted PII**

In [83]:
transaction_data['transaction_id'] = transaction_data['transaction_id'].apply(lambda x: cipher.decrypt(x.encode()).decode())
transaction_data['transaction_time'] = transaction_data['transaction_time'].apply(lambda x: cipher.decrypt(x.encode()).decode())
transaction_data['user_id'] = transaction_data['user_id'].apply(lambda x: cipher.decrypt(x.encode()).decode())

In [84]:
# Convert transaction_time back to datetime
transaction_data['transaction_time'] = pd.to_datetime(transaction_data['transaction_time'])

In [85]:
transaction_data

Unnamed: 0,transaction_id,user_id,transaction_amount,transaction_time,transaction_type,merchant_category,is_fraud,transaction_hour,transaction_day
0,0,757,1381.890979,2024-06-14 18:24:00.374078,payment,clothing,0,18,4
1,1,516,1496.852507,2024-06-14 18:25:00.374078,payment,clothing,0,18,4
2,2,418,7023.063859,2024-06-14 18:26:00.374078,payment,groceries,0,18,4
3,3,598,5130.829439,2024-06-14 18:27:00.374078,transfer,groceries,0,18,4
4,4,564,2160.250393,2024-06-14 18:28:00.374078,withdrawal,travel,0,18,4
...,...,...,...,...,...,...,...,...,...
39995,39995,783,8942.273210,2024-07-12 12:59:00.374078,transfer,entertainment,0,12,4
39996,39996,375,3514.627601,2024-07-12 13:00:00.374078,transfer,clothing,0,13,4
39997,39997,415,4796.516440,2024-07-12 13:01:00.374078,transfer,travel,0,13,4
39998,39998,795,3814.870288,2024-07-12 13:02:00.374078,payment,electronics,0,13,4


In [86]:
# Check for present working directory
%pwd

'/content'

In [87]:
# If condition used to drop columns added during feature engineering
if 'transaction_hour' in transaction_data.columns:
    transaction_data = transaction_data.drop(columns=['transaction_hour'])

if 'transaction_day' in transaction_data.columns:
    transaction_data = transaction_data.drop(columns=['transaction_day'])

In [88]:
# Saves the decrypted data into a CSV file
transaction_data.to_csv('decrypted_transaction_data.csv', index=False)