In [13]:
from pathlib import Path
import pandas as pd
import numpy as np
import pgeocode
import joblib
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [14]:
# Loading data
model_df = pd.read_csv('data/final_merged_100users.csv')

In [16]:
# Displaying the model
model_df.head()

Unnamed: 0,Amount,Use Chip,Merchant State,Errors?,Is Fraud?,Per Capita Income - Zipcode,Yearly Income - Person,Total Debt,FICO Score,Num Credit Cards,Has Chip,Cards Issued,International,Online,Age_at_transaction,income_to_debt,day_of_week,timestamp,time_of_day,distances
0,134.09,Swipe Transaction,CA,No Error,0,29278,59696,127613,787,5,1,2,0,0,36,0.467789,6,1030861000.0,Morning,0.0
1,38.48,Swipe Transaction,CA,No Error,0,29278,59696,127613,787,5,1,2,0,0,36,0.467789,6,1030863000.0,Morning,33.540588
2,120.34,Swipe Transaction,CA,No Error,0,29278,59696,127613,787,5,1,2,0,0,36,0.467789,0,1030948000.0,Morning,33.540588
3,128.95,Swipe Transaction,CA,No Error,0,29278,59696,127613,787,5,1,2,0,0,36,0.467789,0,1030989000.0,Afternoon,33.540588
4,104.71,Swipe Transaction,CA,No Error,0,29278,59696,127613,787,5,1,2,0,0,36,0.467789,1,1031034000.0,Morning,0.0


In [17]:
# Convert categorical data to numeric with `pd.get_dummies`
dummies = pd.get_dummies(model_df)
dummies.head()

Unnamed: 0,Amount,Is Fraud?,Per Capita Income - Zipcode,Yearly Income - Person,Total Debt,FICO Score,Num Credit Cards,Has Chip,Cards Issued,International,...,"Errors?_Bad PIN,Technical Glitch",Errors?_Bad Zipcode,Errors?_Insufficient Balance,"Errors?_Insufficient Balance,Technical Glitch",Errors?_No Error,Errors?_Technical Glitch,time_of_day_Afternoon,time_of_day_Evening,time_of_day_Morning,time_of_day_Night
0,134.09,0,29278,59696,127613,787,5,1,2,0,...,False,False,False,False,True,False,False,False,True,False
1,38.48,0,29278,59696,127613,787,5,1,2,0,...,False,False,False,False,True,False,False,False,True,False
2,120.34,0,29278,59696,127613,787,5,1,2,0,...,False,False,False,False,True,False,False,False,True,False
3,128.95,0,29278,59696,127613,787,5,1,2,0,...,False,False,False,False,True,False,True,False,False,False
4,104.71,0,29278,59696,127613,787,5,1,2,0,...,False,False,False,False,True,False,False,False,True,False


In [20]:
model_df['Merchant State'].values

array(['CA', 'CA', 'CA', ..., 'Online', 'HI', 'Online'], dtype=object)

In [19]:
model_df['Is Fraud?'].values

array([0, 0, 0, ..., 0, 0, 0])

In [21]:
# Splitting  preprocessed data 
y = dummies['Is Fraud?'].values
X = dummies.drop(columns='Is Fraud?').values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [22]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [23]:
# Defining the model
features_total = len(X_train[0])
nodes_layer1 = 60
nodes_layer2 = 30
nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units = nodes_layer1, input_dim = features_total, activation = 'relu'))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units = nodes_layer2, activation = 'relu'))

# Output layer
nn.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))

# Check the structure of the model
nn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
# Compiling the model
nn.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [25]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
[1m2272/2272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.9987 - loss: 0.0262
Epoch 2/50
[1m2272/2272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9988 - loss: 0.0065
Epoch 3/50
[1m2272/2272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.9989 - loss: 0.0055
Epoch 4/50
[1m2272/2272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.9989 - loss: 0.0049
Epoch 5/50
[1m2272/2272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.9989 - loss: 0.0059
Epoch 6/50
[1m2272/2272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.9991 - loss: 0.0045
Epoch 7/50
[1m2272/2272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9991 - loss: 0.0044
Epoch 8/50
[1m2272/2272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9990 - loss: 0.0039
Epoch 9/50
[1m2272/2

In [26]:
# Evaluating the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

758/758 - 1s - 2ms/step - accuracy: 0.9989 - loss: 0.0051
Loss: 0.005073701962828636, Accuracy: 0.9989271759986877


In [29]:
# Exporting the model to HDF5 file
nn.save('Models/nnmodel_aleks_test.h5')



In [30]:
# Loading the model
loaded_model_1 = tf.keras.models.load_model("Models/nnmodel_aleks_test.h5")

# Checking the structure
loaded_model_1.summary()



In [32]:
# Load the model
loaded_model = joblib.load(model_filename)
# Saving the model with joblib
model_filename = 'models/nnmodel_aleks_joblib_test.joblib'
joblib.dump(loaded_model, model_filename) 
print(f"Model saved to {model_filename}")

# Load the model
loaded_model = joblib.load(model_filename)
print("Model loaded successfully")

# Loading the data
#data_path = 'data/transactions_users_100..csv'

Model saved to models/nnmodel_aleks_joblib_test.joblib
Model loaded successfully


In [39]:
X_test.dtype

dtype('O')

In [40]:
array = X_test.astype(np.float32)

In [41]:
array

array([[4.9830e+01, 2.2681e+04, 3.3483e+04, ..., 0.0000e+00, 0.0000e+00,
        0.0000e+00],
       [7.1800e+00, 2.2681e+04, 3.3483e+04, ..., 0.0000e+00, 1.0000e+00,
        0.0000e+00],
       [1.7689e+02, 2.9278e+04, 5.9696e+04, ..., 0.0000e+00, 1.0000e+00,
        0.0000e+00],
       ...,
       [3.3850e+01, 2.9278e+04, 5.9696e+04, ..., 0.0000e+00, 1.0000e+00,
        0.0000e+00],
       [4.5620e+01, 3.7891e+04, 7.7254e+04, ..., 1.0000e+00, 0.0000e+00,
        0.0000e+00],
       [4.3710e+01, 2.2681e+04, 3.3483e+04, ..., 0.0000e+00, 0.0000e+00,
        1.0000e+00]], dtype=float32)

In [42]:
X_train

array([[99.04, 53797, 109687, ..., False, False, False],
       [32.51, 22681, 33483, ..., True, False, False],
       [10.06, 22681, 33483, ..., False, True, False],
       ...,
       [162.76, 53797, 109687, ..., False, True, False],
       [9.41, 22681, 33483, ..., False, True, False],
       [120.02, 53797, 109687, ..., True, False, False]], dtype=object)

In [43]:
# Predicting on the training set
y_pred_loaded = loaded_model.predict(array)

ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense_9" is incompatible with the layer: expected axis -1 of input shape to have value 169, but received input with shape (32, 129)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(32, 129), dtype=float32)
  • training=False
  • mask=None

In [None]:
# Evaluation
print('\nClassification Report (Neural Network):')
print(classification_report(y_test, y_pred_loaded))
print('Confusion Matrix (Neural Network):')
print(confusion_matrix(y_test, y_pred_loaded))
print('Accuracy Score (Neural Network):')
print(accuracy_score(y_test, y_pred_loaded))

In [None]:
# Generating the confusion matrix
cm = confusion_matrix(y_test, y_test_pred)

# Displaying the confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()

plt.show()