# **Importing Libraries**

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import IsolationForest
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from cryptography.fernet import Fernet

# **Generating Transaction Data with Numpy**

In [4]:
# Parameters
num_transactions = 10000
fraud_ratio = 0.05  # 5% of the transactions are fraud

# Transaction Types
transaction_types = ['purchase', 'transfer', 'withdrawal', 'payment']

# Merchant Categories
merchant_categories = ['electronics', 'clothing', 'groceries', 'travel', 'entertainment']

In [5]:
# Generate Transaction Data
data = {
    'transaction_id': np.arange(num_transactions),
    'user_id': np.random.randint(1, 1000, num_transactions),
    'transaction_amount': np.round(np.random.uniform(1, 1000, num_transactions), 2),
    'transaction_time': pd.date_range(start='2023-01-01', periods=num_transactions, freq='T'),
    'transaction_type': np.random.choice(transaction_types, num_transactions),
    'merchant_category': np.random.choice(merchant_categories, num_transactions),
    'is_fraud': np.random.choice([0, 1], num_transactions, p=[1-fraud_ratio, fraud_ratio])
}

In [6]:
# Create DataFrame
transaction_data = pd.DataFrame(data)

In [7]:
# Save dataset to CSV file
transaction_data.to_csv('transaction_data.csv', index=False)

In [9]:
# Display first 10 transactions
print(transaction_data.head(10))

   transaction_id  user_id  transaction_amount    transaction_time  \
0               0      421              876.43 2023-01-01 00:00:00   
1               1      166               64.72 2023-01-01 00:01:00   
2               2      831              759.40 2023-01-01 00:02:00   
3               3      465               48.64 2023-01-01 00:03:00   
4               4      396               96.42 2023-01-01 00:04:00   
5               5      464              314.50 2023-01-01 00:05:00   
6               6      821              709.54 2023-01-01 00:06:00   
7               7      557               76.81 2023-01-01 00:07:00   
8               8      992              186.98 2023-01-01 00:08:00   
9               9      415              844.23 2023-01-01 00:09:00   

  transaction_type merchant_category  is_fraud  
0          payment       electronics         0  
1         purchase     entertainment         0  
2          payment     entertainment         0  
3          payment     entertainm

# **Generate Encryption Key**

In [10]:
key = Fernet.generate_key()
cipher = Fernet(key)

# **Feature Engineering**

In [11]:
# Load the dataset
transaction_data = pd.read_csv('transaction_data.csv')

In [12]:
transaction_data['transaction_time'] = pd.to_datetime(transaction_data['transaction_time'])
transaction_data['transaction_hour'] = transaction_data['transaction_time'].dt.hour
transaction_data['transaction_day'] = transaction_data['transaction_time'].dt.dayofweek

# **Data Encryption**

In [13]:
transaction_data['transaction_id'] = transaction_data['transaction_id'].apply(lambda x: cipher.encrypt(str(x).encode()).decode())
transaction_data['transaction_time'] = transaction_data['transaction_time'].apply(lambda x: cipher.encrypt(str(x).encode()).decode())

with open('encryption_key.key', 'wb') as key_file:
    key_file.write(key)

# **Data Anonymization**

In [14]:
transaction_data = transaction_data.drop(columns=['user_id'])

# **Differential Privacy**

In [15]:
numerical_features = ['transaction_amount']
categorical_features = ['transaction_type', 'merchant_category']

In [16]:
def add_noise(data, epsilon=0.1):
    noise = np.random.laplace(0, 1/epsilon, data.shape)
    return data + noise

transaction_data[numerical_features] = add_noise(transaction_data[numerical_features])

# **Encode Categorical Features**

In [17]:
categorical_features = ['transaction_type', 'merchant_category']
numerical_features = ['transaction_amount']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

In [18]:
# Apply the preprocessing pipeline
X = transaction_data.drop(columns=['transaction_id', 'transaction_time', 'is_fraud'])
y = transaction_data['is_fraud']

X_preprocessed = preprocessor.fit_transform(X)

In [19]:
# Convert the preprocessed data back to a DataFrame for better visualization
X_preprocessed_df = pd.DataFrame(X_preprocessed, columns=preprocessor.get_feature_names_out())



X_preprocessed_df['transaction_hour'] = transaction_data['transaction_hour']
X_preprocessed_df['transaction_day'] = transaction_data['transaction_day']

# **Data Splitting**

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed_df, y, test_size=0.2, random_state=42)

# **Anomaly Detection using Isolation Forest Algorithm**

In [21]:
iso_forest = IsolationForest(contamination=0.05)
iso_forest.fit(X_train)

train_anomalies = iso_forest.predict(X_train)
test_anomalies = iso_forest.predict(X_test)

train_anomalies = np.where(train_anomalies == -1, 1, 0)
test_anomalies = np.where(test_anomalies == -1, 1, 0)

X_train['anomaly'] = train_anomalies
X_test['anomaly'] = test_anomalies



# **Pattern Recognition using Neural Network**

In [22]:
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [23]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [24]:
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7f34147e7580>

In [25]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy}')

Test Accuracy: 0.9559999704360962


In [26]:
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)



# **Model Evaluation**

In [27]:
from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[1912    0]
 [  88    0]]
              precision    recall  f1-score   support

           0       0.96      1.00      0.98      1912
           1       0.00      0.00      0.00        88

    accuracy                           0.96      2000
   macro avg       0.48      0.50      0.49      2000
weighted avg       0.91      0.96      0.93      2000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# **Displaying Enrcypted Transaction Data**

In [28]:
transaction_data.head()

Unnamed: 0,transaction_id,transaction_amount,transaction_time,transaction_type,merchant_category,is_fraud,transaction_hour,transaction_day
0,gAAAAABmkpv632cZJEfJYV5P7j9zzh_4diqkO_GV7c8jBB...,871.447303,gAAAAABmkpv7jM6r9diAu47ljUkgr8wtgWhfYw-d5xeKFd...,payment,electronics,0,0,6
1,gAAAAABmkpv6LzDTCwkf_ngK6erqoUqMGnqiyYz09MYIwk...,66.424547,gAAAAABmkpv7z-PL5eYWPpoacJiZdHDuKJUDBoF-vEdaw6...,purchase,entertainment,0,0,6
2,gAAAAABmkpv68GIRMDGDHvJ7yFPvSlu7nzMUh5o6kIa1ju...,754.916352,gAAAAABmkpv7JWtNjZw4pzQwjpSdTKe7p73pjGB4-kG2eM...,payment,entertainment,0,0,6
3,gAAAAABmkpv688lzpxcTHRdQDhRyGCinN4m70y6-v3UR0L...,47.479052,gAAAAABmkpv7EXyfQNew-Az-wprQqgQGRAslRM0i_JmHqv...,payment,entertainment,0,0,6
4,gAAAAABmkpv6YmolHXZKmNRJ8U-m6ZgC_d-xz23XUUKxmf...,105.032887,gAAAAABmkpv732KTyKEacfl6eiTTUa25ngXjjgNEW-6agf...,transfer,entertainment,0,0,6


# **Decrypting the encrypted PII**

In [29]:
transaction_data['transaction_id'] = transaction_data['transaction_id'].apply(lambda x: cipher.decrypt(x.encode()).decode())
transaction_data['transaction_time'] = transaction_data['transaction_time'].apply(lambda x: cipher.decrypt(x.encode()).decode())

In [30]:
# Convert transaction_time back to datetime
transaction_data['transaction_time'] = pd.to_datetime(transaction_data['transaction_time'])

In [31]:
print(transaction_data.head())

  transaction_id  transaction_amount    transaction_time transaction_type  \
0              0          871.447303 2023-01-01 00:00:00          payment   
1              1           66.424547 2023-01-01 00:01:00         purchase   
2              2          754.916352 2023-01-01 00:02:00          payment   
3              3           47.479052 2023-01-01 00:03:00          payment   
4              4          105.032887 2023-01-01 00:04:00         transfer   

  merchant_category  is_fraud  transaction_hour  transaction_day  
0       electronics         0                 0                6  
1     entertainment         0                 0                6  
2     entertainment         0                 0                6  
3     entertainment         0                 0                6  
4     entertainment         0                 0                6  
