In [1]:
import os

In [2]:
%pwd

'd:\\Projects\\DeepFake Voice Recognition\\audio-deepfake-detection\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Projects\\DeepFake Voice Recognition\\audio-deepfake-detection'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    trained_model_dir: Path
    data_dir: Path
    updated_model_dir: Path
    metrics: list
    epochs: int
    batch_size: int
    learning_rate: float
    
@dataclass(frozen=True)
class CallbackPreparationConfig:
    root_dir: Path
    tensorboard_log_dir: Path
    model_checkpoint_dir: Path
    early_stopping_monitor: str
    early_stopping_patience: int

In [6]:
from audioClassifier.constants import *
from audioClassifier.utils.common import open_yaml_file, create_directories

In [7]:
class ConfigManager:
    def __init__(self, config_file = CONFIG_PATH, params_file = PARAMS_PATH):
        self.config = open_yaml_file(config_file)
        self.params = open_yaml_file(params_file)
        
        create_directories([self.config.artifacts_root])
        
    def read_callback_prep_config(self) -> CallbackPreparationConfig:
        callback_prep = self.config.callback_preparation
        
        checkpoint_dir = os.path.dirname(callback_prep.model_checkpoint_dir)
        create_directories([checkpoint_dir, self.config.callback_preparation.tensorboard_log_dir])
        
        callback_prep_config = CallbackPreparationConfig(
            root_dir = Path(callback_prep.root_dir),
            tensorboard_log_dir = Path(callback_prep.tensorboard_log_dir),
            model_checkpoint_dir = Path(callback_prep.model_checkpoint_dir),
            early_stopping_monitor = self.params.early_stopping_monitor,
            early_stopping_patience = self.params.early_stopping_patience 
        )
        
        return callback_prep_config
    
    def read_model_train_config(self) -> ModelTrainingConfig:
        model_train = self.config.model_training
        model_prep = self.config.model_preparation
        data_val = self.config.data_validation
        
        create_directories([self.config.model_training.root_dir])
        
        model_train_config = ModelTrainingConfig(
            root_dir = Path(model_train.root_dir),
            trained_model_dir = Path(model_train.trained_model_dir),
            data_dir = Path(data_val.data_path),
            updated_model_dir = Path(model_prep.updated_model_path),
            metrics = self.params.metrics,
            batch_size = self.params.batch_size,
            epochs = self.params.epochs,   
            learning_rate = self.params.learning_rate
        )
        
        return model_train_config

In [8]:
import time
import numpy as np
from tensorflow.keras import callbacks
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC, Precision, Recall, TruePositives, TrueNegatives, FalsePositives, FalseNegatives
from audioClassifier import logger




In [9]:
class CallbackPreparation:
    def __init__(self, config: CallbackPreparationConfig):
        self.config = config
        
    def _get_timestamped_tb_directory(self):
        timestamp = time.strftime("%Y-%m-%d--%H%M%S")
        return self.config.tensorboard_log_dir / f"run-{timestamp}"
    
    def prepare_callbacks(self):
        tensorboard_log_dir = self._get_timestamped_tb_directory()
        
        try:
            tensorboard_callback = callbacks.TensorBoard(log_dir = str(tensorboard_log_dir))
            checkpoint_callback = callbacks.ModelCheckpoint(filepath = str(self.config.model_checkpoint_dir / "model-{epoch:02d}-{val_loss:.2f}.keras"), 
                                                            save_best_only = True)
            early_stopping_callback = callbacks.EarlyStopping(monitor = self.config.early_stopping_monitor, 
                                                              patience = self.config.early_stopping_patience, 
                                                              restore_best_weights = True)
            return [tensorboard_callback, checkpoint_callback, early_stopping_callback]
        except Exception as e:
            logger.error(f"Error creating callbacks: {e}")
            raise e

In [10]:
class ModelTraining:
    def __init__(self, config: ModelTrainingConfig):
        self.config = config
        
    def _load_data(self):
        try:
            X_train = np.load(self.config.data_dir / 'X_train.npy')
            y_train = np.load(self.config.data_dir / 'y_train.npy')
            X_test = np.load(self.config.data_dir / 'X_test.npy')
            y_test = np.load(self.config.data_dir / 'y_test.npy')
            return X_train, y_train, X_test, y_test
        except Exception as e:
            logger.error(f"Error loading data: {e}")
            raise e
        
    def _parse_metrics(self, metrics_list):
        parsed_metrics = []
        for metric in metrics_list:
            if metric == 'accuracy':
                parsed_metrics.append('accuracy')
            elif metric == 'auc':
                parsed_metrics.append(AUC(name = 'auc'))
            elif metric == 'recall':
                parsed_metrics.append(Recall(name = 'recall'))
            elif metric == 'precision':
                parsed_metrics.append(Precision(name = 'precision'))
            elif metric == 'true_positives':
                parsed_metrics.append(TruePositives(name = 'true_positives'))
            elif metric == 'true_negatives':
                parsed_metrics.append(TrueNegatives(name = 'true_negatives'))
            elif metric == 'false_positives':
                parsed_metrics.append(FalsePositives(name = 'false_positives'))
            elif metric == 'false_negatives':
                parsed_metrics.append(FalseNegatives(name = 'false_negatives'))
            else:
                raise ValueError(f"Unsupported metric: {metric}")
        return parsed_metrics
    
    def _load_model(self):
        try:
            model = load_model(str(self.config.updated_model_dir), compile = False)
            metrics_list = self._parse_metrics(self.config.metrics)
            model.compile(optimizer = Adam(learning_rate = self.config.learning_rate), 
                              loss = 'binary_crossentropy', 
                              metrics = metrics_list)
            return model
        except Exception as e:
            logger.error(f"Error loading and compiling model: {e}")
            raise e
    
    def train_model(self, callback: list):
        X_train, y_train, X_test, y_test = self._load_data()
        model = self._load_model()
        
        try:
            model.fit(X_train, y_train, epochs = self.config.epochs, batch_size = self.config.batch_size, validation_data = [X_test, y_test], callbacks = callback)
            model.save(str(self.config.trained_model_dir))
            logger.info(f"Model trained successfully and saved to {self.config.trained_model_dir}.")
        except Exception as e:
            logger.error(f"Error training the model: {e}")
            raise e

In [11]:
try:
    config = ConfigManager()
    callback_preparation_config = config.read_callback_prep_config()
    callback_preparation = CallbackPreparation(config = callback_preparation_config)
    prepared_callbacks = callback_preparation.prepare_callbacks()
    
    model_training_config = config.read_model_train_config()
    model_training = ModelTraining(config = model_training_config)
    model_training.train_model(callback = prepared_callbacks)    
except Exception as e:
    raise e

2024-07-24 18:35:34,145 - common.py - INFO - YAML file 'config\config.yaml' was loaded successfully.
2024-07-24 18:35:34,146 - common.py - INFO - YAML file 'params.yaml' was loaded successfully.
2024-07-24 18:35:34,147 - common.py - INFO - Directory 'artifacts' created successfully or already exists.
2024-07-24 18:35:34,148 - common.py - INFO - Directory 'artifacts/callback_preparation/checkpoint_dir' created successfully or already exists.
2024-07-24 18:35:34,149 - common.py - INFO - Directory 'artifacts/callback_preparation/tensorboard_log_dir' created successfully or already exists.
2024-07-24 18:35:34,150 - common.py - INFO - Directory 'artifacts/model_training' created successfully or already exists.




Epoch 1/100






Epoch 2/100
 17/131 [==>...........................] - ETA: 25s - loss: 0.2119 - accuracy: 0.9651 - auc: 0.9952 - recall: 0.9520

KeyboardInterrupt: 