In [7]:
import os
os.chdir('../')

In [23]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataPreprocessingConfig:
    root_dir: Path
    train_dir: Path
    val_dir: Path
    train_tf_records_dir : Path
    test_tf_records_dir : Path
    train_tf_records_file_path : str
    test_tf_records_file_path : str
    params_batch_size : int
    params_class_names : list
    params_image_size : int
    params_shuffle : bool
    params_random_seed : int
    params_train_num_shards : int
    params_test_num_shards : int
    param_random_rotation_left_factor: float
    param_random_rotation_right_factor: float
    param_random_flip_mode: str
    param_random_contrast_factor: float

In [2]:
from Emotion_Detector.constants import PARAMS_FILE_PATH, CONFIG_FILE_PATH
from Emotion_Detector.utils import *
from tensorflow.keras.layers import RandomFlip, RandomRotation,RandomContrast

In [27]:
class ConfigurationManager:
    def __init__(
            self,
            config_path = CONFIG_FILE_PATH,
            params_path = PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_path)
        self.params = read_yaml(params_path)
        create_directories([self.config.artifacts_root])

    def get_data_preprocessing_config(self) -> DataPreprocessingConfig:
        config = self.config.data_preprocessing


        create_directories([config.root_dir])
        create_directories([config.train_tf_records_dir])
        create_directories([config.test_tf_records_dir])

        data_preprocessing_config = DataPreprocessingConfig(
            root_dir = Path(config.root_dir),
            train_dir = Path(config.train_dir),
            val_dir = Path(config.val_dir),
            train_tf_records_dir = Path(config.train_tf_records_dir),
            test_tf_records_dir = Path(config.test_tf_records_dir),
            train_tf_records_file_path = config.train_tf_records_file_path,
            test_tf_records_file_path = config.test_tf_records_file_path,
            params_batch_size = self.params.BATCH_SIZE,
            params_class_names = self.params.CLASS_NAMES,
            params_image_size = self.params.IMAGE_SIZE,
            params_shuffle = self.params.SHUFFLE,
            params_random_seed = self.params.RANDOM_SEED,
            params_train_num_shards = self.params.TRAIN_NUM_SHARDS,
            params_test_num_shards = self.params.TEST_NUM_SHARDS,
            param_random_rotation_left_factor = self.params.RANDOM_ROTATION_LEFT_FACTOR,
            param_random_rotation_right_factor = self.params.RANDOM_ROTATION_RIGHT_FACTOR,
            param_random_flip_mode = self.params.RANDOM_FLIP_MODE,
            param_random_contrast_factor = self.params.RANDOM_CONTRAST_FACTOR
        )
        return data_preprocessing_config

In [43]:
class Data_Preprocessing:
    def __init__(self, config: DataPreprocessingConfig):
        self.config = config

    
    def _load_data_from_directory(self):
        train_data = tf.keras.utils.image_dataset_from_directory(
            self.config.train_dir,
            labels='inferred', ## consider file as class
            label_mode='categorical', ## factorized classes
            class_names=self.config.params_class_names, ## Defined class name as per subdirectories
            color_mode='rgb',
            batch_size=self.config.params_batch_size,
            image_size=(self.config.params_image_size, self.config.params_image_size),
            shuffle=self.config.params_shuffle,
            seed=self.config.params_random_seed
        )


        val_data = tf.keras.utils.image_dataset_from_directory(
            self.config.val_dir,
            labels='inferred', ## consider file as class
            label_mode='categorical', ## factorized classes
            class_names=self.config.params_class_names, ## Defined class name as per subdirectories
            color_mode='rgb',
            batch_size=self.config.params_batch_size,
            image_size=(self.config.params_image_size, self.config.params_image_size),
            shuffle=self.config.params_shuffle,
            seed=self.config.params_random_seed
        )

        return train_data, val_data
    
    
    def _augment_data(self, train_data):

        augment_layers = tf.keras.Sequential([
            RandomRotation(factor = (self.config.param_random_rotation_left_factor, self.config.param_random_rotation_right_factor)),
            RandomFlip(mode = self.config.param_random_flip_mode),
            RandomContrast(factor = self.config.param_random_contrast_factor)
        ])

        def augment_layer(image, label):
            return augment_layers(image, training = True), label
        
        train_data = (
            train_data.map(augment_layer, num_parallel_calls=tf.data.AUTOTUNE)#.prefetch(tf.data.AUTOTUNE)
        )

        return train_data
    
    def preprocess_and_create_tf_records(self):

        train_data, val_data = self._load_data_from_directory()
        train_data = self._augment_data(train_data)

        train_data = (train_data.unbatch())
        val_data = (val_data.unbatch())

        encoded_train_data = (train_data.map(image_to_byte_encoder))
        encoded_val_data = (val_data.map(image_to_byte_encoder))

        if (os.path.exists(self.config.train_tf_records_dir) and 
            len(os.listdir(self.config.train_tf_records_dir)) == self.config.params_train_num_shards):

            print('Train tf records already exists!')

        else:
            print('Creating tf records for train set...') 
            write_tf_records(NUM_SHARDS= self.config.params_train_num_shards,
                            encoded_data=encoded_train_data,
                            path_to_write=self.config.train_tf_records_file_path
                            )
            
        if (os.path.exists(self.config.test_tf_records_dir) and 
            len(os.listdir(self.config.test_tf_records_dir)) == self.config.params_test_num_shards):

            print('Test tf records already exists!') 
            
        else:
            print('Creating tf records for test set...') 
            write_tf_records(NUM_SHARDS= self.config.params_test_num_shards,
                            encoded_data=encoded_val_data,
                            path_to_write=self.config.test_tf_records_file_path
                            )
        
        return "TF Records Created Successfully for training and validation data"

In [44]:
try:
    config = ConfigurationManager()
    data_preprocessing_config = config.get_data_preprocessing_config()
    data_preprocessing = Data_Preprocessing(config = data_preprocessing_config)
    data_preprocessing.preprocess_and_create_tf_records()
except Exception as e:
    raise e

Found 28709 files belonging to 7 classes.
Found 7178 files belonging to 7 classes.
Train tf records already exists!
Creating tf records for test set...
