In [283]:
import tensorflow as tf
from keras.preprocessing.image import load_img, img_to_array
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Input, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import kagglehub

In [272]:
# Download the RVF10K dataset

kaggle_path = kagglehub.dataset_download("sachchitkunichetty/rvf10k")
print(kaggle_path)

C:\Users\Ziyad\.cache\kagglehub\datasets\sachchitkunichetty\rvf10k\versions\2


In [185]:
# Collect image paths and encode labels

def image_preprocessing(path):
    images_paths = []
    images_labels = []
    
    for root, dirs, files in os.walk(path):
        for filename in files:
            if filename.endswith('.jpg'):
                images_paths.append(os.path.join(root, filename))
                images_labels.append(0 if 'real' in root.lower() else 1) # {real} : 0 , {fake} : 1
    
    return np.array(images_paths), np.array(images_labels)

In [186]:
# Prepare image paths and labels for training and validation

image_train, image_train_label = image_preprocessing(kaggle_path + r'\rvf10k\train')
image_valid, image_valid_label = image_preprocessing(kaggle_path + r'\rvf10k\valid')

print(image_train.shape)
print(image_valid.shape)

(7000,)
(3000,)


In [187]:
# Convert image paths to image arrays

def image_processing(img_paths, img_labels, target_size=(128, 128)):
    
    images = []
    labels = []
    
    for i in range(len(img_paths)):
        
        # Load image, resize and convert to array
        img = load_img(img_paths[i], target_size=target_size)
        img_array = img_to_array(img)
        
        # Normalize the array
        img_array = img_array / 255.0
        
        # Append to images[] and corresponding label to labels
        images.append(img_array)
        labels.append(img_labels[i])
        
    return np.array(images), np.array(labels)

In [188]:
encoded_train, encoded_train_label = image_processing(image_train, image_train_label)

In [189]:
encoded_valid, encoded_valid_label = image_processing(image_valid, image_valid_label)

In [190]:
encoded_valid.shape # 3000 arrays, each 128 rows and 128 columns, and each element has 3 nums (RGB) (7000,128,128,3)

(3000, 128, 128, 3)

In [229]:
# Define Convolutional Neural Network model
def CNN_Model(input_shape=(128, 128, 3)):
    
    model = tf.keras.Sequential([
        
        # Input Layer
        Input(shape=input_shape),
        
        # Convolutional Layers
        Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2,2)),  
        
        Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2,2)),
        Dropout(0.2),
        
        # Dense Layers
        Flatten(),
        Dense(units=128, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(units=1, activation='sigmoid')
        
    ])
    
    return model

In [230]:
# Build the model
model = CNN_Model()

In [231]:
# Show the model summary
CNN_Model().summary()

In [235]:
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Regularization
early_stop = EarlyStopping(monitor='val_loss', patience=5)

In [279]:
# Train the model
model.fit(encoded_train,
          encoded_train_label,
          epochs=50,
          validation_data=(encoded_valid, encoded_valid_label),
          callbacks=early_stop) 




In [281]:
# Save the model
model.save('model_final.keras')

In [282]:
# Final evaluation on validation set

model.evaluate(encoded_valid, encoded_valid_label, batch_size=32, callbacks=[early_stop])

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 159ms/step - accuracy: 0.8560 - loss: 0.4054


[0.3367975354194641, 0.8786666393280029]

In [278]:
# Print classification report to see performance
preds = (model.predict(encoded_valid) >= 0.5).astype(int)
print(classification_report(encoded_valid_label, preds))

# Print the confusion matrix
cm = confusion_matrix(encoded_valid_label, preds)
print(cm)

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 154ms/step
              precision    recall  f1-score   support

           0       0.85      0.91      0.88      1500
           1       0.91      0.84      0.87      1500

    accuracy                           0.88      3000
   macro avg       0.88      0.88      0.88      3000
weighted avg       0.88      0.88      0.88      3000

[[1369  131]
 [ 233 1267]]
