Importing Libraries and Dataset

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import warnings
warnings.filterwarnings("ignore")

In [3]:
df=pd.read_csv("Fraud.csv")
df.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [4]:
df.isFraud.value_counts()

isFraud
0    6354407
1       8213
Name: count, dtype: int64

In [5]:
data = df.copy()

In [6]:
data.isnull().sum()

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64

In [7]:
data.duplicated().sum()

0

In [8]:
dummies=pd.get_dummies(data['type'],drop_first=True)
dummies

Unnamed: 0,CASH_OUT,DEBIT,PAYMENT,TRANSFER
0,False,False,True,False
1,False,False,True,False
2,False,False,False,True
3,True,False,False,False
4,False,False,True,False
...,...,...,...,...
6362615,True,False,False,False
6362616,False,False,False,True
6362617,True,False,False,False
6362618,False,False,False,True


In [9]:
data=pd.concat((data,dummies),axis=1)
data.drop(['type','nameOrig', 'nameDest', 'isFlaggedFraud'], axis=1, inplace=True)
data

Unnamed: 0,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,isFraud,CASH_OUT,DEBIT,PAYMENT,TRANSFER
0,1,9839.64,170136.00,160296.36,0.00,0.00,0,False,False,True,False
1,1,1864.28,21249.00,19384.72,0.00,0.00,0,False,False,True,False
2,1,181.00,181.00,0.00,0.00,0.00,1,False,False,False,True
3,1,181.00,181.00,0.00,21182.00,0.00,1,True,False,False,False
4,1,11668.14,41554.00,29885.86,0.00,0.00,0,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...
6362615,743,339682.13,339682.13,0.00,0.00,339682.13,1,True,False,False,False
6362616,743,6311409.28,6311409.28,0.00,0.00,0.00,1,False,False,False,True
6362617,743,6311409.28,6311409.28,0.00,68488.84,6379898.11,1,True,False,False,False
6362618,743,850002.52,850002.52,0.00,0.00,0.00,1,False,False,False,True


In [10]:
X = data.drop(['isFraud'], axis=1)
y = data['isFraud']

scaler = StandardScaler()
X = scaler.fit_transform(X)

In [11]:
sm = SMOTE(random_state=42)
X, y = sm.fit_resample(X, y)

In [12]:
y.value_counts()

isFraud
0    6354407
1    6354407
Name: count, dtype: int64

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [14]:
y_train.value_counts()

isFraud
1    4448085
0    4448084
Name: count, dtype: int64

In [15]:
y_test.value_counts()

isFraud
0    1906323
1    1906322
Name: count, dtype: int64

In [16]:
X_train

array([[-0.71942553, -0.29606306, -0.28871645, ..., -0.08095965,
         1.39903602, -0.30234516],
       [ 1.89227697, -0.06062801, -0.2391184 , ..., -0.08095965,
        -0.71477788,  3.30747816],
       [-1.41194072, -0.16064734, -0.26002991, ..., -0.08095965,
        -0.71477788,  3.30747816],
       ...,
       [ 3.14267339,  0.15116328, -0.19483822, ..., -0.08095965,
        -0.71477788,  3.30747816],
       [-0.42434072, -0.29053302, -0.26794049, ..., -0.08095965,
        -0.71477788, -0.30234516],
       [ 2.20241996, -0.17381362, -0.26278264, ..., -0.08095965,
        -0.71477788,  3.30747816]])

In [17]:
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1, activation='sigmoid'))

In [18]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [19]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5)

Epoch 1/5
[1m278006/278006[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3375s[0m 12ms/step - accuracy: 0.9541 - loss: 0.1088 - val_accuracy: 0.9830 - val_loss: 0.0488
Epoch 2/5
[1m278006/278006[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2673s[0m 10ms/step - accuracy: 0.9743 - loss: 0.0694 - val_accuracy: 0.9853 - val_loss: 0.0454
Epoch 3/5
[1m278006/278006[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2995s[0m 11ms/step - accuracy: 0.9775 - loss: 0.0627 - val_accuracy: 0.9879 - val_loss: 0.0372
Epoch 4/5
[1m278006/278006[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1516s[0m 5ms/step - accuracy: 0.9799 - loss: 0.0574 - val_accuracy: 0.9786 - val_loss: 0.0511
Epoch 5/5
[1m278006/278006[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m712s[0m 3ms/step - accuracy: 0.9809 - loss: 0.0551 - val_accuracy: 0.9884 - val_loss: 0.0383


<keras.src.callbacks.history.History at 0x1f0638a43e0>

In [20]:
y_pred=model.predict(X_test)

[1m119146/119146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 1ms/step


In [21]:
y_pred = (y_pred > 0.5).astype("int32")

In [22]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99   1906323
           1       0.99      0.99      0.99   1906322

    accuracy                           0.99   3812645
   macro avg       0.99      0.99      0.99   3812645
weighted avg       0.99      0.99      0.99   3812645



In [23]:
print(confusion_matrix(y_test, y_pred))

[[1878556   27767]
 [  16275 1890047]]
