In [None]:
import tensorflow as tf
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices: tf.config.experimental.set_memory_growth(device, True)


### Import necessary libraries

In [None]:
import numpy as np
import os
import itertools
import matplotlib.pyplot as plt
import json
import seaborn as sns

%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from keras.models import Sequential, load_model
from keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout, Activation, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from PIL import Image, ImageChops, ImageEnhance
from tqdm.notebook import tqdm

### Error Level Analysis

In [None]:
#converts input image to ela applied image
def convert_to_ela_image(path,quality):

    original_image = Image.open(path).convert('RGB')

    #resaving input image at the desired quality
    resaved_file_name = 'resaved_image.jpg'     #predefined filename for resaved image
    original_image.save(resaved_file_name,'JPEG',quality=quality)
    resaved_image = Image.open(resaved_file_name)

    #pixel difference between original and resaved image
    ela_image = ImageChops.difference(original_image,resaved_image)
    
    #scaling factors are calculated from pixel extremas
    extrema = ela_image.getextrema()
    max_difference = max([pix[1] for pix in extrema])
    if max_difference ==0:
        max_difference = 1
    scale = 350.0 / max_difference
    
    #enhancing elaimage to brighten the pixels
    ela_image = ImageEnhance.Brightness(ela_image).enhance(scale)

    ela_image.save("ela_image.png")
    return ela_image

### Dataset Preparation

In [None]:
def prepare_image(image_path):
    image_size = (128, 128)
    return np.array(convert_to_ela_image(image_path, 90).resize(image_size)).flatten() / 255.0         #normalizing the array values obtained from input image

In [None]:
X = [] # ELA converted images
Y = [] # 0 for fake, 1 for real

In [None]:
#adding authentic images
import os
from tqdm import tqdm

X = []
Y = []

# Paths for authentic images in CASIA1 and CASIA2
authentic_paths = [
    r'C:\Users\udhay\OneDrive\Documents\archive\CASIA1\Au',  # Authentic images from CASIA1
    r'C:\Users\udhay\OneDrive\Documents\archive\CASIA2\Au'   # Authentic images from CASIA2
]

# Function to process authentic images
for path in authentic_paths:
    for filename in tqdm(os.listdir(path), desc=f"Processing Authentic Images in {path}"):
        if filename.endswith(('jpg', 'png')):
            full_path = os.path.join(path, filename)
            X.append(prepare_image(full_path))  # Preprocess the image
            Y.append(1)  # Label for authentic images (1)

print(f'Total authentic images: {len(X)}\nTotal labels: {len(Y)}')





In [None]:
#adding forged images

import os
from tqdm import tqdm

X = []
Y = []

# Paths for forged images in CASIA1 and CASIA2
forged_paths = [
    r'C:\Users\udhay\OneDrive\Documents\archive\CASIA1\Sp',  # Forged images from CASIA1
    r'C:\Users\udhay\OneDrive\Documents\archive\CASIA2\Tp'   # Forged images from CASIA2
]

# Function to process forged images
for path in forged_paths:
    for filename in tqdm(os.listdir(path), desc=f"Processing Forged Images in {path}"):
        if filename.endswith(('jpg', 'png')):
            full_path = os.path.join(path, filename)
            X.append(prepare_image(full_path))  # Preprocess the image
            Y.append(0)  # Label for forged images (0)

print(f'Total forged images: {len(X)}\nTotal labels: {len(Y)}')


In [None]:
# Step 1: Import necessary libraries
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import os

# Step 2: Define your prepare_image function (if you haven't already)

# Step 3: Load your images and labels into X and Y
X = []  # To hold image data
Y = []  # To hold labels

# Adding authentic images
path_authentic = r'C:\Users\udhay\OneDrive\Documents\archive\CASIA1\Au'  # Path for authentic images
for filename in tqdm(os.listdir(path_authentic), desc="Processing Authentic Images:"):
    if filename.endswith('jpg') or filename.endswith('png'):
        full_path = os.path.join(path_authentic, filename)
        X.append(prepare_image(full_path))        
        Y.append(1)  # Label for authentic images

# Adding forged images
path_forged = r'C:\Users\udhay\OneDrive\Documents\archive\CASIA1\Sp'  # Path for forged images
for filename in tqdm(os.listdir(path_forged), desc="Processing Forged Images:"):
    if filename.endswith('jpg') or filename.endswith('png'):
        full_path = os.path.join(path_forged, filename)
        X.append(prepare_image(full_path))        
        Y.append(0)  # Label for forged images

# Step 4: Convert X and Y to NumPy arrays
X = np.array(X)
Y = np.array(Y)

# Step 5: Reshape X if necessary (based on your model requirements)
X = X.reshape(-1, 128, 128, 3)  # Adjust the size according to your image dimensions

# Step 6: Partition the dataset into training, validation, and testing
X_temp, X_test, Y_temp, Y_test = train_test_split(X, Y, test_size=0.05, random_state=5)
X_train, X_val, Y_train, Y_val = train_test_split(X_temp, Y_temp, test_size=0.2, random_state=5)

# Print the sizes of the datasets
print(f'Training images: {len(X_train)}, Training labels: {len(Y_train)}')
print(f'Validation images: {len(X_val)}, Validation labels: {len(Y_val)}')
print(f'Test images: {len(X_test)}, Test labels: {len(Y_test)}')

# Step 7: Continue with model training and evaluation...


### Partitioning dataset for training, validation and testing

In [None]:
from sklearn.model_selection import train_test_split

# Assume X and Y are already populated with image data and labels

# Split the data into training + validation and testing sets
X_temp, X_test, Y_temp, Y_test = train_test_split(X, Y, test_size=0.05, random_state=5)

# Further split the training + validation into training and validation sets
X_train, X_val, Y_train, Y_val = train_test_split(X_temp, Y_temp, test_size=0.2, random_state=5)

# Reshape the data for the model
# The following line assumes your images are in the shape (height, width, channels), 
# where height and width are the dimensions of the images (e.g., 128x128) 
# and channels is 3 for RGB images. Adjust the dimensions based on your image data.
X_train = X_train.reshape(-1, 128, 128, 3)
X_val = X_val.reshape(-1, 128, 128, 3)
X_test = X_test.reshape(-1, 128, 128, 3)

print(f'Training images: {len(X_train)}, Training labels: {len(Y_train)}')
print(f'Validation images: {len(X_val)}, Validation labels: {len(Y_val)}')
print(f'Test images: {len(X_test)}, Test labels: {len(Y_test)}')


### CNN Model

In [None]:
from tensorflow.keras.layers import Input

def build_model():
    model = Sequential()  # Sequential Model
    model.add(Input(shape=(128, 128, 3)))  # Use Input layer here
    model.add(Conv2D(filters=64, kernel_size=(5, 5), padding='valid', activation='relu'))
    model.add(Conv2D(filters=64, kernel_size=(5, 5), padding='valid', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=64, kernel_size=(5, 5), padding='valid', activation='relu'))
    model.add(Conv2D(filters=64, kernel_size=(5, 5), padding='valid', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=64, kernel_size=(5, 5), padding='valid', activation='relu'))
    model.add(Conv2D(filters=64, kernel_size=(5, 5), padding='valid', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='valid', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(GlobalAveragePooling2D())
    model.add(Dense(1, activation='sigmoid'))
    return model


In [None]:
model = build_model()
model.summary()

### Model Training

In [None]:
epochs = 15
batch_size = 32


In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers.schedules import ExponentialDecay

# Define the number of epochs
epochs = 15

# Learning rate schedule
init_lr = 1e-4  # initial learning rate for the optimizer
lr_schedule = ExponentialDecay(
    initial_learning_rate=init_lr,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

# Create the Adam optimizer
optimizer = Adam(learning_rate=lr_schedule)

# Compile the model
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
#Early Stopping
early_stopping = EarlyStopping(monitor = 'val_accuracy',
                               min_delta = 0,
                               patience = 10,
                               verbose = 0,
                               mode = 'auto')

In [None]:
hist = model.fit(X_train,
                 Y_train,
                 batch_size = batch_size,
                 epochs = epochs,
                 validation_data = (X_val, Y_val),
                 callbacks = [early_stopping])

In [None]:
# Save the model as a .h5 file
model.save('model.h5')  # Specify a filename

# Get the dictionary containing each metric and the loss for each epoch
history_dict = hist.history

# Save it as a JSON file
with open('history.json', 'w') as f:  # Specify a filename for the JSON file
    json.dump(history_dict, f)


### Plotting the training and validation curves

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 2, figsize=(15, 5))

# Figure 1: Loss
ax[0].plot(history_dict['loss'], color='b', label="Training Loss")
ax[0].plot(history_dict['val_loss'], color='r', label="Validation Loss")
ax[0].set_xlabel('Epochs', fontsize=16)
ax[0].set_ylabel('Loss', fontsize=16)
ax[0].legend(loc='best', shadow=True)
ax[0].grid(True)

# Figure 2: Accuracy
ax[1].plot(history_dict['accuracy'], color='b', label="Training Accuracy")
ax[1].plot(history_dict['val_accuracy'], color='r', label="Validation Accuracy")
ax[1].set_xlabel('Epochs', fontsize=16)
ax[1].set_ylabel('Accuracy', fontsize=16)
ax[1].legend(loc='best', shadow=True)
ax[1].grid(True)

fig.suptitle('Training and Validation Metrics', fontsize=20)
plt.tight_layout()
plt.show()


### Confusion Matrix

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(cf_matrix):
    # Number of images in each classification block
    group_counts = ["{0:0.0f}".format(value) for value in cf_matrix.flatten()]
    # Percentage value of images in each block with respect to total images
    group_percentages = ["{0:.2%}".format(value) for value in cf_matrix.flatten() / np.sum(cf_matrix)]

    axes_labels = ['Forged', 'Authentic']
    # Combine counts and percentages into a label for each block
    labels = [f"{v1}\n{v2}" for v1, v2 in zip(group_counts, group_percentages)]
    labels = np.asarray(labels).reshape(2, 2)

    # Create heatmap
    sns.heatmap(cf_matrix, annot=labels, fmt='', cmap="flare", xticklabels=axes_labels, yticklabels=axes_labels)

    plt.xlabel('Predicted labels', fontsize=13)
    plt.ylabel('True labels', fontsize=13)
    plt.title('Confusion Matrix', fontsize=10, fontweight='bold')
    plt.show()  # Show the plot

In [None]:
# Predict the values from the validation dataset
Y_pred = model.predict(X_val)
Y_pred_classes = np.round(Y_pred)  # Round off the sigmoid values
Y_true = Y_val  # True labels

# Compute the confusion matrix
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes)

# Plot the confusion matrix
plot_confusion_matrix(confusion_mtx)

### Classification Report

In [None]:
print(classification_report(Y_true, Y_pred_classes))

### Testing Accuracy

In [None]:
class_names = ['Forged', 'Authentic']

In [None]:
# Initialize counters for correct predictions and total test images
correct_test = 0  # Correctly predicted test images
total_test = len(X_test)  # Total test images

# Iterate over the test images and their corresponding labels
for index, image in enumerate(tqdm(X_test, desc="Processing Images : ")):
    image = image.reshape(-1, 128, 128, 3)  # Reshape the image for the model input
    y_pred = model.predict(image)  # Get the predicted probabilities
    y_pred_class = np.round(y_pred)  # Convert probabilities to binary class (0 or 1)
    
    # Check if the predicted class matches the true class
    if y_pred_class[0][0] == Y_test[index]:  # Use [0][0] to get the scalar value
        correct_test += 1

# Calculate and print the accuracy
accuracy = correct_test / total_test * 100.0
print(f'Total test images: {total_test}')
print(f'Correctly predicted images: {correct_test}')
print(f'Accuracy: {accuracy:.2f} %')

# Test an image

In [None]:
# Testing a single image
test_image_path = 'path/to/your/test/image.jpg'  # Set your test image path
test_image = prepare_image(test_image_path)
test_image = test_image.reshape(-1, 128, 128, 3)  # Ensure the shape matches your model's input

y_pred = model.predict(test_image)
y_pred_class = round(y_pred[0][0])  # Round to get binary class

# Displaying the images
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

# Display original image
original_image = plt.imread(test_image_path)
ax[0].axis('off')
ax[0].imshow(original_image)
ax[0].set_title('Original Image')

# Display ELA applied image
ax[1].axis('off')
ax[1].imshow(convert_to_ela_image(test_image_path, 90))
ax[1].set_title('ELA Image')

# Prediction output
print(f'Prediction: {class_names[y_pred_class]}')
if y_pred[0][0] <= 0.5:
    print(f'Confidence: {(1 - y_pred[0][0]) * 100:0.2f}%')
else:
    print(f'Confidence: {y_pred[0][0] * 100:0.2f}%')
print('--------------------------------------------------------------------------------------------------------------')


# Test a dataset

In [None]:
test_folder_path = ''  # dataset path
authentic, forged, total = 0, 0, 0

for filename in tqdm(os.listdir(test_folder_path), desc="Processing Images : "):
    if filename.endswith('jpg') or filename.endswith('png'):
        test_image_path = os.path.join(test_folder_path, filename)  # use test_folder_path instead of path
        test_image = prepare_image(test_image_path)  
        test_image = test_image.reshape(-1, 128, 128, 3)  # fix the reshape syntax
        y_pred = model.predict(test_image)  # changed from image to test_image
        y_pred_class = np.round(y_pred[0][0])  # use y_pred[0][0] to get the predicted class
        total += 1
        
        if y_pred_class == 0:
            forged += 1
        else:
            authentic += 1

print(f'Total images: {total}\nAuthentic Images: {authentic}\nForged Images: {forged}')
