In [1]:
import os 

os.getcwd()
os.chdir('../')
os.getcwd()

'/home/gfspet/ml-projects/kidney-disease'

In [2]:
import sys
sys.path.append('/home/gfspet/ml-projects/kidney-disease/src')
from dataclasses import dataclass
from pathlib import Path 
from cnn_classifier.constants import * 
from cnn_classifier.utils.common import read_yaml, create_directories


@dataclass(frozen=True)
class TrainingConfig: 
    root_dir: Path
    trained_model_path: Path
    used_model_path: Path
    training_data: Path
    params_epoch: int
    params_batch_size: int 
    params_augmentation: bool
    params_image_size: list
    params_learning_rate: float
    
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath=CONFIG_FILE_PATH, 
        params_filepath=PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        create_directories([self.config.artifacts_root])
    
    def get_training_config(self) -> TrainingConfig:
        train_config = self.config.training
        pretrained_config = self.config.pretrained_base_model
        params = self.params
        train_data = os.path.join(self.config.data_ingestion.unzip_dir, 'data')
        
        create_directories([Path(train_config.root_dir)])
  
        train_config = TrainingConfig(
            root_dir=Path(train_config.root_dir),
            trained_model_path=Path(train_config.trained_model_path),
            used_model_path=Path(pretrained_config.used_model_path),
            training_data=Path(train_data),
            params_epoch=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE,
            params_learning_rate=params.LEARNING_RATE
        )
        
        return train_config

In [None]:
import tensorflow as tf


class Training: 
    def __init__(self, config: TrainingConfig):
        self.config = config
        
    def get_base_model(self):
        self.model = tf.keras.models.load_model(
            self.config.used_model_path
        )
        
        self.model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=self.config.params_learning_rate),
            loss=tf.keras.losses.CategoricalCrossentropy(),
            metrics=['accuracy']
        )
    
    def train_valid_generator(self):
        datagenerator_kwargs = dict(
            rescale = 1./255,
            validation_split = 0.2
        )
        
        dataflow_kwargs = dict(
            target_size = self.config.params_image_size[:-1],
            batch_size = self.config.params_batch_size
            # interpolation = 'nearest'
        )
        
        valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )
        
        self.validation_generator = valid_datagen.flow_from_directory(
            directory=self.config.training_data,
            shuffle=False,
            subset='validation',
            class_mode='categorical',
            **dataflow_kwargs
        )
        
        if self.config.params_augmentation:
            train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
                # rotation_range=30,
                horizontal_flip=True,
                # width_shift_range=0.2,
                # height_shift_range=0.2,
                # shear_range=0.2,
                zoom_range=0.2,
                **datagenerator_kwargs
                )
        else: 
            train_datagen = valid_datagen
        
        self.train_generator = train_datagen.flow_from_directory(
            directory=self.config.training_data,
            shuffle=True,
            subset='training',
            class_mode='categorical',
            **dataflow_kwargs
        ) 
        
    def train(self, callback_list: list):
        
        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epoch,
            validation_data=self.validation_generator,
            verbose=1,
            callbacks=callback_list
        )
        
        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )
        
    @staticmethod
    def save_model(path: Path, model: tf.keras.Model): 
        model.save(path)

2024-11-24 12:41:53.828003: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-24 12:41:53.842124: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-24 12:41:53.890993: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732423313.967986  415908 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732423313.984480  415908 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-24 12:41:54.074618: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [4]:
class myCallback(tf.keras.callbacks.Callback):
    pass

early_stop = myCallback()
callback_list = [early_stop]

try:
    config = ConfigurationManager() 
    training_config = config.get_training_config()
    training = Training(config=training_config) 
    training.get_base_model()
    training.train_valid_generator()
    training.train(callback_list=callback_list)
except Exception as e: 
    raise e

[ 2024-11-24 12:42:00,019 ] 21 cnn_classifier_logger : INFO : common : yaml file: config/config.yaml loaded successfully
[ 2024-11-24 12:42:00,027 ] 21 cnn_classifier_logger : INFO : common : yaml file: params.yaml loaded successfully
[ 2024-11-24 12:42:00,030 ] 33 cnn_classifier_logger : INFO : common : Created directory at: artifacts
[ 2024-11-24 12:42:00,035 ] 33 cnn_classifier_logger : INFO : common : Created directory at: artifacts/training


2024-11-24 12:42:00.370762: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Found 60 images belonging to 3 classes.
Found 540 images belonging to 3 classes.


  self._warn_if_super_not_called()


Epoch 1/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 640ms/step - accuracy: 0.5213 - loss: 1.1286

  self._warn_if_super_not_called()


[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 770ms/step - accuracy: 0.5233 - loss: 1.1236 - val_accuracy: 0.4167 - val_loss: 5.4974
Epoch 2/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 805ms/step - accuracy: 0.7481 - loss: 0.6422 - val_accuracy: 0.4167 - val_loss: 2.9197
Epoch 3/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 759ms/step - accuracy: 0.8074 - loss: 0.4963 - val_accuracy: 0.5500 - val_loss: 2.6193
Epoch 4/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 720ms/step - accuracy: 0.8466 - loss: 0.3914 - val_accuracy: 0.3333 - val_loss: 6.7188
Epoch 5/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 751ms/step - accuracy: 0.8750 - loss: 0.3840 - val_accuracy: 0.4333 - val_loss: 3.2266
Epoch 6/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 709ms/step - accuracy: 0.9022 - loss: 0.3081 - val_accuracy: 0.4667 - val_loss: 2.8052
Epoch 7/10
[1m10/34[0m [32m━━━

KeyboardInterrupt: 