In [1]:
import os 

os.getcwd()
os.chdir('../')
os.getcwd()

'/home/gfspet/ml-projects/kidney-disease'

In [2]:
import sys
sys.path.append('/home/gfspet/ml-projects/kidney-disease/src')
from dataclasses import dataclass
from pathlib import Path 
from cnn_classifier.constants import * 
from cnn_classifier.utils.common import read_yaml, create_directories


@dataclass(frozen=True)
class TrainingConfig: 
    root_dir: Path
    trained_model_path: Path
    used_model_path: Path
    training_data: Path
    params_epoch: int
    params_batch_size: int 
    params_augmentation: bool
    params_image_size: list
    params_learning_rate: float
    
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath=CONFIG_FILE_PATH, 
        params_filepath=PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        create_directories([self.config.artifacts_root])
    
    def get_training_config(self) -> TrainingConfig:
        train_config = self.config.training
        pretrained_config = self.config.pretrained_base_model
        params = self.params
        train_data = os.path.join(self.config.data_ingestion.unzip_dir, 'data')
        
        create_directories([Path(train_config.root_dir)])
  
        train_config = TrainingConfig(
            root_dir=Path(train_config.root_dir),
            trained_model_path=Path(train_config.trained_model_path),
            used_model_path=Path(pretrained_config.used_model_path),
            training_data=Path(train_data),
            params_epoch=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE,
            params_learning_rate=params.LEARNING_RATE
        )
        
        return train_config

In [3]:
import tensorflow as tf


class Training: 
    def __init__(self, config: TrainingConfig):
        self.config = config
        
    def get_base_model(self):
        self.model = tf.keras.models.load_model(
            self.config.used_model_path
        )
        
        self.model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=self.config.params_learning_rate),
            loss=tf.keras.losses.CategoricalCrossentropy(),
            metrics=['accuracy']
        )
    
    def train_valid_generator(self):
        datagenerator_kwargs = dict(
            rescale = 1./255,
            validation_split = 0.2
        )
        
        dataflow_kwargs = dict(
            target_size = self.config.params_image_size[:-1],
            batch_size = self.config.params_batch_size
            # interpolation = 'nearest'
        )
        
        valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )
        
        self.validation_generator = valid_datagen.flow_from_directory(
            directory=self.config.training_data,
            shuffle=False,
            subset='validation',
            class_mode='categorical',
            **dataflow_kwargs
        )
        
        if self.config.params_augmentation:
            train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=10,
                horizontal_flip=True,
                # width_shift_range=0.2,
                # height_shift_range=0.2,
                # shear_range=0.2,
                zoom_range=0.1,
                **datagenerator_kwargs
                )
        else: 
            train_datagen = valid_datagen
        
        self.train_generator = train_datagen.flow_from_directory(
            directory=self.config.training_data,
            shuffle=True,
            subset='training',
            class_mode='categorical',
            **dataflow_kwargs
        ) 
        
    def train(self):
        
        checkpoint = tf.keras.callbacks.ModelCheckpoint(
            filepath=self.config.trained_model_path,
            monitor='val_accuracy',
            save_best_only=True,
            mode='max',
            verbose=1
        )
        
        callback = [checkpoint]
        
        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epoch,
            validation_data=self.validation_generator,
            verbose=1,
            callbacks=callback
        )
        
        _, test_acc = self.model.evaluate(self.validation_generator)
        print(f"Test Accuracy: {test_acc:.2f}")
    
    @staticmethod
    def save_model(path: Path, model: tf.keras.Model): 
        model.save(path)

2024-11-24 13:21:06.389788: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-24 13:21:06.438204: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-24 13:21:06.517333: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732425666.623209  424054 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732425666.647198  424054 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-24 13:21:06.781142: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [4]:
try:
    config = ConfigurationManager() 
    training_config = config.get_training_config()
    training = Training(config=training_config) 
    training.get_base_model()
    training.train_valid_generator()
    training.train()
except Exception as e: 
    raise e

[ 2024-11-24 13:21:10,955 ] 21 cnn_classifier_logger : INFO : common : yaml file: config/config.yaml loaded successfully
[ 2024-11-24 13:21:10,969 ] 21 cnn_classifier_logger : INFO : common : yaml file: params.yaml loaded successfully
[ 2024-11-24 13:21:10,985 ] 33 cnn_classifier_logger : INFO : common : Created directory at: artifacts
[ 2024-11-24 13:21:10,986 ] 33 cnn_classifier_logger : INFO : common : Created directory at: artifacts/training


2024-11-24 13:21:11.136681: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Found 120 images belonging to 3 classes.
Found 480 images belonging to 3 classes.


  self._warn_if_super_not_called()


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 597ms/step - accuracy: 0.5682 - loss: 1.1199

  self._warn_if_super_not_called()



Epoch 1: val_accuracy improved from -inf to 0.48333, saving model to artifacts/training/final_model.keras
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 768ms/step - accuracy: 0.5715 - loss: 1.1098 - val_accuracy: 0.4833 - val_loss: 2.5980
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 295ms/step - accuracy: 0.3970 - loss: 2.8465
Test Accuracy: 0.48
