## Model Training

In [1]:
import os

In [2]:
%pwd

'/home/utpal108/dev/Upwork/Projects/Diabetic-Retinopathy-Prediction/notebooks/diabetic_retinopathy'

In [3]:
os.chdir('../../')

In [4]:
%pwd

'/home/utpal108/dev/Upwork/Projects/Diabetic-Retinopathy-Prediction'

In [5]:
# Config Entity
from dataclasses import dataclass
from pathlib import Path
import pandas as pd

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_path: Path
    training_images_path: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list

In [6]:
from diabeticRetinopathy.constants import *
from diabeticRetinopathy.utils import create_directories, read_yaml

In [7]:
# Configuration Manager
class ConfigurationManager:
    def __init__(self, config_filepath= CONFIG_FILE_PATH, params_filepath= PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])
    
    def get_training_config(self) -> TrainingConfig:
        base_model_config = self.config.prepare_base_model
        training_config = self.config.training
        data_ingestion_config = self.config.data_ingestion
        params = self.params

        create_directories([Path(training_config.root_dir)])
        
        training_config = TrainingConfig(
            root_dir = Path(training_config.root_dir),
            trained_model_path = Path(training_config.trained_model_path),
            updated_base_model_path = Path(base_model_config.updated_base_model_path),
            training_path = Path(training_config.training_file_path),
            training_images_path = Path(training_config.training_images_path),
            params_epochs =  params.EPOCHS,
            params_batch_size = params.BATCH_SIZE,
            params_is_augmentation = params.AUGMENTATION,
            params_image_size = params.IMAGE_SIZE
        )

        return training_config

In [1]:
import tensorflow as tf

2024-04-16 13:18:43.254000: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-16 13:18:43.257145: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-16 13:18:43.302703: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [31]:
class Traing:
    def __init__(self, config:TrainingConfig):
        self.config = config
    
    def get_base_model(self):
        self.model = tf.keras.models.load_model(self.config.updated_base_model_path)

    def train_valid_generator(self):

        train_df = pd.read_csv(self.config.training_path)
        train_df['id_code'] = train_df['id_code'].apply(lambda x: x+'.png')
        train_df['diagnosis'] = train_df['diagnosis'].astype('str')

        train_datagen=tf.keras.preprocessing.image.ImageDataGenerator(
            rescale=1./255, 
            validation_split=0.2,
            horizontal_flip=True)
        

        self.train_generator=train_datagen.flow_from_dataframe(
            dataframe=train_df,
            directory=self.config.training_images_path,
            x_col="id_code",
            y_col="diagnosis",
            batch_size=self.config.params_batch_size,
            class_mode="categorical",
            target_size=self.config.params_image_size[:-1],
            subset='training')
        

        self.valid_generator=train_datagen.flow_from_dataframe(
            dataframe=train_df,
            directory=self.config.training_images_path,
            x_col="id_code",
            y_col="diagnosis",
            batch_size=self.config.params_batch_size,
            class_mode="categorical", 
            target_size=self.config.params_image_size[:-1],
            subset='validation')

    def save_model(self, path: Path, model: tf.keras.Model):
        model.save(path)
        
    def train(self, callback_list: list):
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size

        self.model.fit(
            self.train_generator,
            epochs = self.config.params_epochs,
            steps_per_epoch = self.steps_per_epoch,
            validation_steps = self.validation_steps,
            validation_data = self.valid_generator,
            callbacks = callback_list
        )

        self.save_model(
            path = self.config.trained_model_path,
            model = self.model
        )

In [29]:
from diabeticRetinopathy.components.diabetic_retinopathy.prepare_callback import PrepareCallback
from diabeticRetinopathy.config import ConfigurationManager as CallbackConfigManager

In [32]:
try:
    config = ConfigurationManager()

    # Prepare Callbacks
    callback_config_manager = CallbackConfigManager()
    prepare_callbacks_config = callback_config_manager.get_prepare_callbacks_config()
    prepare_callback = PrepareCallback(config=prepare_callbacks_config)
    callback_list = prepare_callback.get_tb_ckpt_callback()

    training_config = config.get_training_config()
    training = Traing(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train(
        callback_list = callback_list
    )

except Exception as e:
    raise e

2024-04-16 13:16:28,739 : diabeticRetinopathy.logger - INFO - YAML file: config/config.yaml loaded successfully
2024-04-16 13:16:28,741 : diabeticRetinopathy.logger - INFO - YAML file: params.yaml loaded successfully
2024-04-16 13:16:28,742 : diabeticRetinopathy.logger - INFO - created directory at: artifacts
2024-04-16 13:16:28,745 : diabeticRetinopathy.logger - INFO - YAML file: config/config.yaml loaded successfully
2024-04-16 13:16:28,746 : diabeticRetinopathy.logger - INFO - YAML file: params.yaml loaded successfully
2024-04-16 13:16:28,747 : diabeticRetinopathy.logger - INFO - created directory at: artifacts
2024-04-16 13:16:28,748 : diabeticRetinopathy.logger - INFO - created directory at: artifacts/prepare_callbacks/tensorboard_log_dir
2024-04-16 13:16:28,748 : diabeticRetinopathy.logger - INFO - created directory at: artifacts/prepare_callbacks/checkpoint_dir
2024-04-16 13:16:28,749 : diabeticRetinopathy.logger - INFO - created directory at: artifacts/training


Found 2930 validated image filenames belonging to 5 classes.
Found 732 validated image filenames belonging to 5 classes.


  trackable.load_own_variables(weights_store.get(inner_path))


Epoch 1/5


  self._warn_if_super_not_called()
2024-04-16 13:16:42.396575: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 50331648 exceeds 10% of free system memory.
2024-04-16 13:16:43.458307: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 50331648 exceeds 10% of free system memory.
2024-04-16 13:16:44.604465: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 50331648 exceeds 10% of free system memory.
2024-04-16 13:16:45.594524: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 50331648 exceeds 10% of free system memory.
2024-04-16 13:16:46.720334: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 50331648 exceeds 10% of free system memory.


[1m  7/183[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m13:48[0m 5s/step - accuracy: 0.2554 - loss: 12.2782