In [11]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split # split data into train and test sets
from sklearn.metrics import classification_report,confusion_matrix # evaluation metrics
from tensorflow.keras.models import Sequential # ANN architecture
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten # ANN layers
from tensorflow.keras.activations import relu, sigmoid # activation functions
from tensorflow.keras.optimizers import SGD, Adam # optimizers
from tensorflow.keras.losses import binary_crossentropy # loss function
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.preprocessing import StandardScaler
from keras.layers import BatchNormalization


In [12]:
df=pd.read_csv('dataset.csv')
df.tail()

Unnamed: 0.1,Unnamed: 0,mse,mse_lab,mse_magnitude,mse_phase,med_magnitude,med_phase,hvs1,hvs2,hvs3,ssim,ncc,if_value,histogram,class
12609,5118,272.0,3.160146,272.89975,3.71657,253.0,0.0,0.039292,0.135823,113.483824,0.955859,0.000311,136.927794,13.670006,Tampered
12610,5119,146.0,1.868469,146.0768,3.738477,254.0,0.0003988071,0.039192,0.047764,124.019408,0.973298,0.000232,73.52505,13.944947,Tampered
12611,5120,18.0,1.052999,18.178,2.130556,255.0,0.02856436,0.013885,0.084138,89.379875,0.99299,0.00031,9.585495,11.527247,Tampered
12612,5121,3.0,0.582408,3.228241,1.691034,255.0,7.105427e-15,0.017413,0.010012,124.402487,0.997959,0.000495,2.1131,12.071681,Tampered
12613,5122,203.0,0.607705,203.09995,4.224677,0.0,0.0,0.065381,0.201001,112.889401,0.945832,0.000205,102.022887,10.487417,Tampered


In [13]:
# df['class'].replace('Original',0,inplace=True)
# df['class'].replace('Tampered',1,inplace=True)
# df.tail() 

df.replace({'class':'Original'},0,inplace=True)
df.replace({'class':'Tampered'},1,inplace=True)

df.tail()


  df.replace({'class':'Tampered'},1,inplace=True)


Unnamed: 0.1,Unnamed: 0,mse,mse_lab,mse_magnitude,mse_phase,med_magnitude,med_phase,hvs1,hvs2,hvs3,ssim,ncc,if_value,histogram,class
12609,5118,272.0,3.160146,272.89975,3.71657,253.0,0.0,0.039292,0.135823,113.483824,0.955859,0.000311,136.927794,13.670006,1
12610,5119,146.0,1.868469,146.0768,3.738477,254.0,0.0003988071,0.039192,0.047764,124.019408,0.973298,0.000232,73.52505,13.944947,1
12611,5120,18.0,1.052999,18.178,2.130556,255.0,0.02856436,0.013885,0.084138,89.379875,0.99299,0.00031,9.585495,11.527247,1
12612,5121,3.0,0.582408,3.228241,1.691034,255.0,7.105427e-15,0.017413,0.010012,124.402487,0.997959,0.000495,2.1131,12.071681,1
12613,5122,203.0,0.607705,203.09995,4.224677,0.0,0.0,0.065381,0.201001,112.889401,0.945832,0.000205,102.022887,10.487417,1


In [14]:
df=df.drop('Unnamed: 0',axis=1)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12614 entries, 0 to 12613
Data columns (total 14 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   mse            12614 non-null  float64
 1   mse_lab        12614 non-null  float64
 2   mse_magnitude  12614 non-null  float64
 3   mse_phase      12614 non-null  float64
 4   med_magnitude  12614 non-null  float64
 5   med_phase      12614 non-null  float64
 6   hvs1           12606 non-null  float64
 7   hvs2           12614 non-null  float64
 8   hvs3           12614 non-null  float64
 9   ssim           12614 non-null  float64
 10  ncc            12614 non-null  float64
 11  if_value       12614 non-null  float64
 12  histogram      12614 non-null  float64
 13  class          12614 non-null  int64  
dtypes: float64(13), int64(1)
memory usage: 1.3 MB


In [15]:
df.replace([np.inf, -np.inf], np.nan, inplace=True) # Replace inf values with NaN
df.dropna(inplace=True) # Drop rows with NaN values
# X,Y are the features and target variables respectively
X = df.drop('class',axis=1) # Drop the target variable which is a class i.e. Original/Tampered
y = df['class'] # Keep the target variable in a separate variable y
X= np.log1p(X) # Handle extremely large values by taking the log of the values

# Creating ANN Neural Network

In [16]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Split the data into train and test sets
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape) # Check the shape of the train and test sets

# Normalize the data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

(10083, 13) (2521, 13) (10083,) (2521,)


In [17]:


# Define the model
model = Sequential()
model.add(Dense(units=128, activation='relu', input_shape=(13,)))
model.add(Dropout(0.5))  # Add dropout layer
model.add(BatchNormalization())  # Add batch normalization layer
model.add(Dense(units=64, activation='relu'))
model.add(Dropout(0.5))  # Add dropout layer
model.add(BatchNormalization())  # Add batch normalization layer
model.add(Dense(units=32, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

# Define optimizer and callbacks
optimizer = Adam(learning_rate=0.001)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Compile the model
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

# Fit the model on the training set
model.fit(x_train, y_train, epochs=100, batch_size=32, validation_split=0.2, shuffle=True, verbose=1, callbacks=[reduce_lr, early_stopping])

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_accuracy}")


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 128)               1792      
                                                                 
 dropout_2 (Dropout)         (None, 128)               0         
                                                                 
 batch_normalization_2 (Bat  (None, 128)               512       
 chNormalization)                                                
                                                                 
 dense_5 (Dense)             (None, 64)                8256      
                                                                 
 dropout_3 (Dropout)         (None, 64)                0         
                                                                 
 batch_normalization_3 (Bat  (None, 64)                256       
 chNormalization)                                     

In [18]:
y_pred = model.predict(x_test)
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to binary predictions

# print(y_pred[:1])  # Print the first 5 predictions

# Compute the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Generate the classification report
class_report = classification_report(y_test, y_pred)

# Print the confusion matrix and classification report
print("Confusion Matrix:")
print(conf_matrix)

print("\nClassification Report:")
print(class_report)

Confusion Matrix:
[[1217  281]
 [ 324  699]]

Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.81      0.80      1498
           1       0.71      0.68      0.70      1023

    accuracy                           0.76      2521
   macro avg       0.75      0.75      0.75      2521
weighted avg       0.76      0.76      0.76      2521



In [19]:
# Evaluate on test set
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_accuracy}")

Test Accuracy: 0.7600158452987671


In [20]:
model.save('model.h5')

  saving_api.save_model(
