In [1]:
import numpy as np
import pandas as pd
import librosa
import io
import soundfile as sf
from moviepy.editor import VideoFileClip
import pickle as pk
import os
import tensorflow as tf
from tensorflow.keras.saving import register_keras_serializable
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [11]:
real_audio_dir = r"REAL"
fake_audio_dir = r"FAKE"

In [12]:
def extract_features(file_path):
    try:
        # Load the video file
        video_clip = VideoFileClip(file_path)
        audio = video_clip.audio
        fps = audio.fps
        audio_samples = np.array(list(audio.iter_frames(fps=fps, dtype="float32"))).flatten()
        buffer = io.BytesIO()
        sf.write(buffer, audio_samples, fps, format='wav')
        buffer.seek(0)
        x, sr = librosa.load(buffer, sr=None)
        mfccs = librosa.feature.mfcc(y=x, sr=sr, n_mfcc=40)

        return mfccs

    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}, {e}")
        return None


def load_data(real_dir, fake_dir):
    labels = []
    features = []

    # Load real audios
    for file_name in os.listdir(real_dir):
        file_path = os.path.join(real_dir, file_name)
        mfccs = extract_features(file_path)
        if mfccs is not None:
            features.append(mfccs)
            labels.append(0)  # 0 for REAL

    # Load fake audios
    for file_name in os.listdir(fake_dir):
        file_path = os.path.join(fake_dir, file_name)
        mfccs = extract_features(file_path)
        if mfccs is not None:
            features.append(mfccs)
            labels.append(1)  # 1 for FAKE

    return np.array(features), np.array(labels)

In [14]:
X, y = load_data(real_audio_dir, fake_audio_dir)

In [16]:
with open("X.pkl", "wb") as f:
    pk.dump(X, f)
with open("y.pkl", "wb") as f:
    pk.dump(y, f)

In [None]:
with open("X.pkl", "rb") as f:
    X = pk.load(f)
with open("y.pkl", "rb") as f:
    y = pk.load(f)

In [17]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Reshape the data to fit the CNN input format
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

In [18]:
model = models.Sequential()

# CNN layers
model.add(
    layers.Conv2D(32, kernel_size=(3, 3), activation="relu")
)
model.add(layers.Conv2D(64, kernel_size=(3, 3), activation="relu"))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.25))

# RNN layers
model.add(layers.Reshape((64, -1)))
model.add(layers.Bidirectional(layers.GRU(128, return_sequences=False)))

# Fully connected layers
model.add(layers.Dense(128, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(2, activation="softmax"))

# Compile the model
model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)

In [5]:
@register_keras_serializable()
class AudioModel(tf.keras.Model):
    def __init__(self, input_shape):
        super(AudioModel, self).__init__()
        self.input_shape = input_shape  # Store the input shape
        # Define the model layers
        self.conv1 = layers.Conv2D(
            32, kernel_size=(3, 3), activation="relu", input_shape=input_shape
        )
        self.conv2 = layers.Conv2D(64, kernel_size=(3, 3), activation="relu")
        self.pool = layers.MaxPooling2D(pool_size=(2, 2))
        self.dropout1 = layers.Dropout(0.25)

        self.reshape = layers.Reshape((64, -1))
        self.gru = layers.Bidirectional(layers.GRU(128, return_sequences=False))

        self.dense1 = layers.Dense(128, activation="relu")
        self.dropout2 = layers.Dropout(0.5)
        self.dense2 = layers.Dense(2, activation="softmax")

    def call(self, inputs):
        # Forward pass through the layers
        x = self.conv1(inputs)
        x = self.conv2(x)
        x = self.pool(x)
        x = self.dropout1(x)

        x = self.reshape(x)
        x = self.gru(x)

        x = self.dense1(x)
        x = self.dropout2(x)
        return self.dense2(x)

    def get_config(self):
        config = super(AudioModel, self).get_config()
        config.update(
            {"input_shape": self.input_shape}  # Include input shape in config
        )
        return config

    @classmethod
    def from_config(cls, config):
        # Create a model instance from the config
        input_shape = config.pop("input_shape")  # Extract input_shape from config
        return cls(input_shape)  # Create an instance of the model


# Function to create and compile the model
def create_model(input_shape):
    model = AudioModel(input_shape)
    model.compile(
        optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
    )
    return model


# Example usage
input_shape = (
    64,
    40,
    1,
)  # Adjust based on your data (e.g., (n_mfccs, time_steps, channels))

In [8]:
model = create_model(input_shape)
model.summary()

In [19]:
checkpoint = ModelCheckpoint(r"models/dl_model.keras", monitor="val_loss", save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor="val_loss", patience=5, verbose=1)

history = model.fit(
    X_train, y_train, epochs=10, batch_size=16, validation_data=(X_test, y_test), callbacks=[checkpoint, early_stopping]
)

Epoch 1/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.8044 - loss: 0.6203
Epoch 1: val_loss improved from inf to 0.60748, saving model to models/dl_model.keras
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 2s/step - accuracy: 0.8033 - loss: 0.6197 - val_accuracy: 0.7273 - val_loss: 0.6075
Epoch 2/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.8246 - loss: 0.5199
Epoch 2: val_loss did not improve from 0.60748
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 2s/step - accuracy: 0.8244 - loss: 0.5193 - val_accuracy: 0.7273 - val_loss: 0.6163
Epoch 3/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.8248 - loss: 0.5241
Epoch 3: val_loss improved from 0.60748 to 0.60204, saving model to models/dl_model.keras
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 2s/step - accuracy: 0.8247 - loss: 0.5232 - val_accuracy: 0.727

In [52]:
model.save(r"models/dl_model.keras", overwrite=True)
print("Model saved successfully.")

Model saved successfully.


In [20]:
# Ensure to import keras properly
import tensorflow as tf
from tensorflow import keras


# Function to load the model
def load_model(model_path):
    try:
        # Load the model from the specified path
        model = keras.models.load_model(model_path)
        print("Model loaded successfully.")
        return model
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

In [21]:
model_path = r"models/dl_model.keras"

# Load the model
loaded_model = load_model(model_path)

Error loading model: A total of 1 objects could not be loaded. Example error message for object <GRUCell name=gru_cell, built=True>:

Layer 'gru_cell' expected 3 variables, but received 0 variables during loading. Expected: ['kernel', 'recurrent_kernel', 'bias']

List of objects that could not be loaded:
[<GRUCell name=gru_cell, built=True>]
