In [1]:
import os 
%pwd

'u:\\mlflow-dvc-cancer\\research'

In [2]:
os.chdir("../")
%pwd

'u:\\mlflow-dvc-cancer'

In [12]:
from dataclasses import dataclass
from pathlib import Path 

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path 
    trained_model_path: Path # Taken from the config.yaml
    updated_base_model_path: Path # from artifacts/prepare_base_model/updated_base_model
    training_data: Path # from artifacts/data_ingestion 
    params_epochs: int # Rest are taken from the params.yaml
    params_batch_size: int 
    params_is_augmentation: bool 
    params_image_size: list 
     

In [13]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories

In [14]:
# Entity 
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_training_config(self)->TrainingConfig:
        training = self.config.training # Taken from config.yaml
        prepare_base_model = self.config.prepare_base_model # Taken from config
        params = self.params # Taken from parans.yaml 
        training_data = os.path.join(self.config.data_ingestion.unzip_dir,"Chest-CT-Scan-data")
        create_directories([Path(training.root_dir)])
        
        training_config = TrainingConfig( # Just adding correct path for all
            root_dir = Path(training.root_dir),
            trained_model_path= Path(training.trained_model_path),
            updated_base_model_path= Path(prepare_base_model.updated_base_model_path),
            training_data= Path(training_data),
            params_epochs= params.EPOCHS,
            params_batch_size= params.BATCH_SIZE,
            params_is_augmentation= params.AUGMENTATION,
            params_image_size= params.IMAGE_SIZE 
        )
        
        return training_config

In [15]:
import os 
import time
import tensorflow as tf 
from zipfile import ZipFile 
import urllib.request as request 

In [16]:
class Training:
    def __init__(self,config:TrainingConfig):
        self.config = config
        
    def get_base_model(self): # Take the upd model path
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )
        
    def train_valid_generator(self):
        # https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator
        # This link has code for ImageDataGenerator 
        
        # List all the params to tune in model
        # These params of dict will go to ImageDataGenerator
        datagenerator_kwargs = dict( # We can add other params as well
            rescale=1./255, # Rescaling the images
            validation_split=0.2 # As we are not having diff test data
        )
        
        # This is for flow_from_directory
        dataflow_kwargs = dict( # This is same for train and validation sets
            target_size= self.config.params_image_size[:-1],#(224,224)
            batch_size= self.config.params_batch_size, # 16
            interpolation="bilinear", # This is for resizing
        )
        
        # Now create your datagenerator
        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs # Fill in the dict of values here
        )
        
        # Now create the flow_from_directory | how to get data from dir
        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory= self.config.training_data, # Comes from Inheritance
            subset="validation", # This is for validation data
            shuffle=False,
            **dataflow_kwargs # This will load the other params
        )
        
        # If augmentation is true apply it
        if self.config.params_is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=40,
                width_shift_range=0.2,
                height_shift_range=0.2,
                shear_range=0.2,
                zoom_range=0.2,
                horizontal_flip=True,
                **datagenerator_kwargs
                )
        else: # Else keep the same generator
            train_datagenerator = valid_datagenerator
        
        # Now we took out 20% of data for val, rest goes to training
        self.train_generator = train_datagenerator.flow_from_directory(
            directory= self.config.training_data, # Same dir
            subset="training", # Subset is training
            shuffle=True, # Shuffle my train data
            **dataflow_kwargs # Call rest of the dict
            )
    @staticmethod
    def save_model(path: Path, model:tf.keras.Model):
        model.save(path) # Save our model in given path
        
    # Training begins
    def train(self):
        self.steps_per_epoch = self.train_generator.samples//self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples//self.valid_generator.batch_size
        
        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epochs,
            steps_per_epoch=self.steps_per_epoch,
            # Pass validation data and its steps per epochs
            validation_data=self.valid_generator,
            validation_steps=self.validation_steps,
        )
        
        # Now trained model we have to save
        self.save_model(
            path= self.config.trained_model_path,
            model= self.model)

In [17]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model() # Call base model path
    training.train_valid_generator() # Get data
    training.train() # Train it and save it
except Exception as e:
    raise e
    
    

[2024-07-11 17:04:58,467: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-07-11 17:04:58,473: INFO: common: yaml file: params.yaml loaded successfully]
[2024-07-11 17:04:58,476: INFO: common: created directory at: artifacts]
[2024-07-11 17:04:58,479: INFO: common: created directory at: artifacts\training]
Found 68 images belonging to 2 classes.
Found 275 images belonging to 2 classes.
