<a href="https://colab.research.google.com/github/xclusive-3G/-Credit-Card-detection-using-ANN/blob/main/Fraud%20Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model
import keras


In [None]:
# loading the dataset to a Pandas DataFrame
credit_card_data = pd.read_csv('content/creditcard.csv')

In [None]:
# first 5 rows of the dataset
credit_card_data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0.0
1,0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0.0
2,1,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0.0
3,1,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0.0
4,2,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0.0


In [None]:
# dataset informations
credit_card_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17918 entries, 0 to 17917
Data columns (total 31 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Time    17918 non-null  int64  
 1   V1      17918 non-null  float64
 2   V2      17918 non-null  float64
 3   V3      17918 non-null  float64
 4   V4      17918 non-null  float64
 5   V5      17917 non-null  float64
 6   V6      17917 non-null  float64
 7   V7      17917 non-null  float64
 8   V8      17917 non-null  float64
 9   V9      17917 non-null  float64
 10  V10     17917 non-null  float64
 11  V11     17917 non-null  float64
 12  V12     17917 non-null  float64
 13  V13     17917 non-null  float64
 14  V14     17917 non-null  float64
 15  V15     17917 non-null  float64
 16  V16     17917 non-null  float64
 17  V17     17917 non-null  float64
 18  V18     17917 non-null  float64
 19  V19     17917 non-null  float64
 20  V20     17917 non-null  float64
 21  V21     17917 non-null  float64
 22

In [None]:
# checking the number of missing values in each column
credit_card_data.isnull().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        1
V6        1
V7        1
V8        1
V9        1
V10       1
V11       1
V12       1
V13       1
V14       1
V15       1
V16       1
V17       1
V18       1
V19       1
V20       1
V21       1
V22       1
V23       1
V24       1
V25       1
V26       1
V27       1
V28       1
Amount    1
Class     1
dtype: int64

In [None]:
# distribution of legit transactions & fraudulent transactions
credit_card_data['Class'].value_counts()

Class
0.0    17836
1.0       81
Name: count, dtype: int64

In [None]:
# separating the data for analysis
legit = credit_card_data[credit_card_data.Class == 0]
fraud = credit_card_data[credit_card_data.Class == 1]

In [None]:
# statistical measures of the data
legit.Amount.describe()

count    17836.000000
mean        67.365025
std        188.754429
min          0.000000
25%          5.490000
50%         15.950000
75%         56.232500
max       7712.430000
Name: Amount, dtype: float64

In [None]:
fraud.Amount.describe()

count      81.000000
mean       98.105926
std       267.464067
min         0.000000
25%         1.000000
50%         1.000000
75%        99.990000
max      1809.680000
Name: Amount, dtype: float64

In [None]:
# compare the values for both transactions
credit_card_data.groupby('Class').mean()

Unnamed: 0_level_0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0,13891.13198,-0.203851,0.22856,0.840631,0.263581,-0.117213,0.112151,-0.110725,-0.008454,0.752065,...,0.032646,-0.05493,-0.144997,-0.036871,0.015838,0.118904,0.035813,0.010352,0.00667,67.365025
1.0,16833.074074,-9.315066,6.779465,-13.05287,6.451318,-6.557541,-2.602613,-9.012856,4.730365,-3.087651,...,0.756424,0.549752,-0.423495,-0.398104,-0.277836,0.355688,0.177616,0.940693,0.090007,98.105926


In [None]:
legit_sample = legit.sample(n=146)

In [None]:
new_dataset = pd.concat([legit_sample, fraud], axis=0)

In [None]:
new_dataset.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
16225,27628,1.238407,0.294685,0.187101,0.498792,-0.153583,-0.55847,-0.026919,-0.033016,-0.198571,...,-0.258767,-0.781571,0.076787,-0.043547,0.229574,0.097867,-0.027422,0.017041,0.99,0.0
13679,24243,1.231874,0.040458,-1.04043,0.483041,2.243337,3.66031,-0.508166,0.763977,1.455313,...,-0.323113,-0.676479,-0.130577,0.949767,0.863527,-0.367132,-0.004768,0.009154,26.26,0.0
5043,4654,-1.346942,-0.198847,2.570935,-0.651883,-0.160021,1.809262,0.339693,0.553286,1.570715,...,-0.069429,0.054124,0.379468,-0.618573,-0.135838,0.929585,-0.030397,0.055164,181.37,0.0
2722,2265,-1.266171,-0.321459,1.784605,-2.404879,-1.251838,-0.348121,-0.675538,0.489034,-2.540517,...,0.085012,0.446617,-0.090068,0.09556,0.43489,-0.147216,0.196593,0.023946,69.0,0.0
8495,11391,1.960497,0.966209,-4.22804,1.504006,2.374975,2.625682,-1.076097,1.084228,1.237211,...,-0.179448,-0.2862,0.677955,0.676118,-1.875719,0.302978,-0.168063,-0.244677,1.0,0.0


In [None]:
new_dataset['Class'].value_counts()

Class
0.0    146
1.0     81
Name: count, dtype: int64

In [None]:
new_dataset.groupby('Class').mean()

Unnamed: 0_level_0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0,13830.917808,-0.083495,0.175751,0.692957,0.194972,0.063897,0.313255,-0.105546,0.068585,0.759075,...,0.026922,-0.166645,-0.22124,0.012117,0.096995,0.096948,-0.027384,0.020964,-0.00456,55.979315
1.0,16833.074074,-9.315066,6.779465,-13.05287,6.451318,-6.557541,-2.602613,-9.012856,4.730365,-3.087651,...,0.756424,0.549752,-0.423495,-0.398104,-0.277836,0.355688,0.177616,0.940693,0.090007,98.105926


In [None]:
new_dataset['normalizedAmount'] = StandardScaler().fit_transform(new_dataset['Amount'].values.reshape(-1,1))
new_dataset = new_dataset.drop(['Amount'], axis=1)
new_dataset = new_dataset.drop(['Time'], axis=1)



In [None]:
X_train, X_test,y_train,y_test = train_test_split(credit_card_data.drop(['Class'], axis=1), credit_card_data['Class'],
test_size=0.3, random_state=0)

In [None]:
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='softmax'))


In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
model_checkpoint = ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)


In [None]:
model.fit(X_train, y_train, batch_size=32, epochs=100, callbacks=[early_stopping, model_checkpoint], validation_data=(X_test, y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100


<keras.src.callbacks.History at 0x7b7d1e9c1840>

In [None]:
score = model.evaluate( X_test, y_test)



In [None]:
print("Accuracy: ", score[1])

Accuracy:  0.0048363097012043


In [None]:
print(y_test.shape)

(5376,)


In [None]:
tscore =model.evaluate(X_train, y_train)



In [None]:
print("Accuracy: ", tscore[1])

Accuracy:  0.9969865679740906


In [None]:
amount = float(input("Enter the transaction amount: "))
date = int(input("Enter the transaction date (ddmmyyyy): "))
user_id = int(input("Enter the user ID: "))


In [None]:
input_data = np.array([[amount, date, user_id]])

In [None]:
print(input_data)

In [None]:
input_array = np.array(input_data, dtype=float)
print(type(input_array))



<class 'numpy.ndarray'>


In [None]:
print(input_array)

[[   67677. 23032005. 66554456.]]


In [None]:
#my_list = input_data

# create a new numpy array with 28 elements my_array = np.zeros(28, dtype=int)


In [None]:
# fill the array with integers from 0 to 27
#my_array[:] = np.arange(30)

In [None]:
# the array now contains 28 integers from 0 to 27
print(input_data)

[[   67677. 23032005. 66554456.]]


In [None]:
#my_array = np.expand_dims(my_array, axis=0)
#print(my_array)

In [None]:
input_data = np.pad(input_data, [(0, 0), (0, 27)], mode='constant')

In [None]:
print(input_data)

[[   67677. 23032005. 66554456.        0.        0.        0.        0.
         0.        0.        0.        0.        0.        0.        0.
         0.        0.        0.        0.        0.        0.        0.
         0.        0.        0.        0.        0.        0.        0.
         0.        0.]]


In [None]:
print(type(X_test))

<class 'pandas.core.frame.DataFrame'>


In [None]:
df = pd.DataFrame(input_data, columns=["A", "B", "C","A", "B", "C","A", "B", "C","A", "B", "C","A", "B", "C","A", "B", "C","A", "B", "C","A", "B", "C","A", "B", "C","A", "B", "C"])

In [None]:
print(df)

         A           B           C    A    B    C    A    B    C    A  ...  \
0  67677.0  23032005.0  66554456.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...   

     C    A    B    C    A    B    C    A    B    C  
0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  

[1 rows x 30 columns]


In [None]:
prediction = model.predict(df)



In [None]:
# Check if the prediction is fraudulent
if prediction[0][0] > 0.5:
    print("Fraudulent transaction")
else:
    print("Legit transaction")


Legit transaction
