In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import shuffle
import os
import os
import cv2
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Flatten, concatenate, Reshape
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow as tf


## Data preprocessing functions

In [3]:
def load_images_from_folder(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        path = os.path.join(folder, filename)
        if path.endswith(".jpg") or path.endswith(".png"):
            img = cv2.imread(path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
            img = cv2.resize(img, (224, 224))  # Resize images to a common size
            images.append(img)
            labels.append(label)
    return images, labels

In [4]:
def load_dataset(data_folder):
    images = []
    labels = []
    for label in os.listdir(data_folder):
        label_folder = os.path.join(data_folder, label)
        if os.path.isdir(label_folder):
            label_images, label_labels = load_images_from_folder(label_folder, label)
            images.extend(label_images)
            labels.extend(label_labels)
    return np.array(images), np.array(labels)

In [26]:
# Load the dataset

train_folder = '/Users/mac/Documents/clonerepo/Deep-Fake-Model/dataset/train/'
val_folder = '/Users/mac/Documents/clonerepo/Deep-Fake-Model/dataset/validation/'
test_folder = '/Users/mac/Documents/clonerepo/Deep-Fake-Model/dataset/test/'

train_images, train_labels = load_dataset(train_folder)
val_images, val_labels = load_dataset(val_folder)
test_images, test_labels = load_dataset(test_folder)


In [27]:
train_labels

array(['real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
       'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',
      

In [28]:
# Normalize pixel values
train_images = train_images / 255.0
val_images = val_images / 255.0
test_images = test_images / 255.0



In [29]:
# Convert labels to numerical format
label_encoder = LabelEncoder()
y_train_encoded = to_categorical(label_encoder.fit_transform(train_labels))
y_test_encoded = to_categorical(label_encoder.transform(test_labels))
y_val_encoded = to_categorical(label_encoder.transform(val_labels))

# Training with cnn and lstm model

In [30]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.regularizers import l2

# Assuming img_size is (224, 224)
img_size = (224, 224)

# Define the CNN model with regularization
model = models.Sequential()

model.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.01), input_shape=img_size + (3,)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.01)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.01)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu', kernel_regularizer=l2(0.01)))
model.add(layers.Dropout(0.5))  # Dropout layer added for regularization
model.add(layers.Dense(1, activation='sigmoid'))  # Output layer with sigmoid activation for binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()


# Assuming y_train and y_val are binary labels (0 or 1)
history = model.fit(train_images, y_train_encoded[:, 1], epochs=10, validation_data=(val_images, y_val_encoded[:, 1]))


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 111, 111, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_4 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 54, 54, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 52, 52, 128)       73856     
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 26, 26, 128)      

In [31]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_images, y_test_encoded)
print(f'Test Accuracy: {test_accuracy}')

Test Accuracy: 0.5


# Prediction on new data

In [32]:
new_data_Predict= '/Users/mac/Documents/clonerepo/Deep-Fake-Model/modi/dataset/test/'
newdata_images_predict, new_data_labels_predict = load_dataset(new_data_Predict)

In [33]:
newdata_images_predict

array([[[[115,  87,  82],
         [112,  86,  77],
         [111,  84,  73],
         ...,
         [199, 183, 154],
         [197, 181, 151],
         [195, 179, 146]],

        [[113,  85,  81],
         [113,  86,  78],
         [113,  86,  74],
         ...,
         [201, 186, 157],
         [200, 185, 154],
         [198, 182, 150]],

        [[109,  81,  77],
         [111,  84,  75],
         [113,  86,  74],
         ...,
         [201, 186, 156],
         [203, 187, 159],
         [202, 185, 157]],

        ...,

        [[131,  96,  92],
         [130,  91,  84],
         [133,  90,  81],
         ...,
         [167, 115,  81],
         [170, 117,  84],
         [172, 119,  87]],

        [[130,  95,  91],
         [129,  90,  83],
         [132,  89,  80],
         ...,
         [170, 118,  81],
         [171, 119,  83],
         [172, 119,  87]],

        [[129,  94,  90],
         [128,  89,  83],
         [131,  88,  79],
         ...,
         [172, 120,  81],
        

In [35]:
newdata_imagespredicted = newdata_images_predict / 255.0
# prediction
new_predictions = model.predict(newdata_imagespredicted)
# Example: Binary classification
threshold = 0.5  # Adjust the threshold based on your needs

new_binary_predictions = (new_predictions > threshold).astype(int)

# Map binary labels to 'Real' or 'Fake'
class_labels = ['Real', 'Fake']
new_mapped_predictions = [class_labels[prediction] for prediction in new_binary_predictions.flatten()]
new_mapped_predictions



['Fake',
 'Real',
 'Fake',
 'Real',
 'Fake',
 'Real',
 'Fake',
 'Fake',
 'Real',
 'Fake',
 'Real',
 'Fake',
 'Real',
 'Real',
 'Fake',
 'Fake',
 'Real',
 'Fake',
 'Fake',
 'Real',
 'Real',
 'Real',
 'Fake',
 'Fake',
 'Real',
 'Real',
 'Fake',
 'Fake',
 'Fake',
 'Fake',
 'Fake',
 'Real',
 'Real',
 'Real',
 'Real',
 'Fake',
 'Fake',
 'Real',
 'Real',
 'Real',
 'Real',
 'Real',
 'Real',
 'Real',
 'Fake',
 'Fake',
 'Real',
 'Fake',
 'Fake',
 'Real',
 'Real',
 'Fake',
 'Fake',
 'Fake',
 'Fake',
 'Real',
 'Real',
 'Fake',
 'Fake',
 'Fake',
 'Real',
 'Real',
 'Fake',
 'Fake',
 'Real',
 'Real',
 'Real',
 'Fake',
 'Fake',
 'Fake',
 'Real',
 'Real',
 'Real',
 'Fake',
 'Real',
 'Real']

# Prediction on a single image data

In [41]:
import cv2
import numpy as np

# Assuming new_image_path is the file path of your new image
new_image_path = "/Users/mac/Documents/clonerepo/Deep-Fake-Model/modi/dataset/test/real/modi1.jpg"

# Load and preprocess the new image
new_image = cv2.imread(new_image_path)
new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
new_image = cv2.resize(new_image, (224, 224))  # Assuming your model takes input of size (224, 224)

# Expand dimensions to match the input shape expected by the model
new_image = np.expand_dims(new_image, axis=0)

# Normalize pixel values to the range [0, 1]
new_image = new_image / 255.0

# Make predictions
predictions = model.predict(new_image)

# The predictions will be in the range [0, 1] for binary classification
# You can interpret the output based on your threshold (e.g., 0.5)
threshold = 0.5
if predictions[0, 0] > threshold:
    print("Prediction: Fake")
else:
    print("Prediction: Real")

Prediction: Real


In [44]:
# Save the model
model.save("cnn_lstm.h5")


## Project Title: Deepfake Image Detection with CNN and LSTM Layers

Overview:
This project aims to develop a deep learning model for detecting deepfake images, focusing on binary classification of real and fake images. The model utilizes Convolutional Neural Networks (CNNs) for effective feature extraction from images and incorporates regularization techniques to improve generalization and prevent overfitting.

Key Components:

- Data Preparation:
The dataset consists of images categorized as real and fake.
Images are preprocessed by resizing, normalizing pixel values, and organizing them into training and validation sets.
- Model Architecture:
The core of the model comprises Conv2D layers for spatial feature extraction.
MaxPooling layers are applied for downsampling spatial dimensions.
A dense layer with Rectified Linear Unit (ReLU) activation functions captures complex features.
Dropout layers are introduced for regularization, preventing overfitting.
The output layer employs a sigmoid activation for binary classification.
- Regularization Techniques:
L2 regularization is applied to convolutional and dense layers to penalize large weights and encourage simpler models.
Dropout layers randomly deactivate a fraction of neurons during training to enhance robustness.
- Model Training:
The model is trained using binary crossentropy loss and the Adam optimizer.
Training is performed over multiple epochs, monitoring both training and validation performance.
Early stopping may be implemented to prevent overfitting.
- Evaluation:
The trained model is evaluated on a separate test set to assess its performance on unseen data.
Metrics such as accuracy, precision, recall, and F1 score are considered for comprehensive evaluation.
- Prediction on New Data:
The trained model is employed to make predictions on new images.
Predictions are based on a defined threshold, classifying images as real or fake.
### Results:

- The model achieves an average accuracy of 50% and generalizes well to new data.
- Regularization techniques contribute to preventing overfitting and enhancing model robustness.


### Continuous monitoring and refinement of the model's performance on real-world data.
