In [1]:
import os
os.chdir("../") # to the prev. dir
%pwd

'c:\\Users\\15600\\Desktop\\PY\\kidney-disease-classification-project'

## Entity

In [2]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingModelConfig:
    root_dir: Path
    prepare_base_model: Path
    trained_model_path: Path
    training_data: Path
    params_batch_size: int
    params_epochs: int
    params_learning_rate: float

## Configuration

In [3]:
from src.KDClassifier.constants import *
from src.KDClassifier.utils.common import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    '''
    read the config.yaml
    return base_model_config
    '''
    def __init__(self, config_filepath=CONFIG_FILE_PATH,
                       params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_training_config(self) -> TrainingModelConfig:
        prepare_base_model = self.config.prepare_base_model
        model_training = self.config.model_training
        training_data = os.path.join(self.config.data_ingestion.unzip_dir, "kidneyCTscan")
        create_directories([model_training.root_dir])

        training_config = TrainingModelConfig(
            root_dir=Path(model_training.root_dir),
            prepare_base_model=Path(prepare_base_model.base_model_path),
            trained_model_path=Path(model_training.trained_model_path), # for saving
            training_data=Path(training_data),
            params_batch_size=self.params.BATCH_SIZE,
            params_epochs=self.params.EPOCHS,
            params_learning_rate=self.params.LEARNING_RATE

        )
        return training_config


## Component

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from transformers import ViTFeatureExtractor

from src.KDClassifier import logger
from src.KDClassifier.utils.dataloader import KidneyDataset

In [6]:
class Training:
    def __init__(self, config: TrainingModelConfig):
        self.config = config

    def get_training_model(self):
        self.model = torch.load(self.config.prepare_base_model)
    
    def train_valid_generator(self):
        feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')

        dataset = KidneyDataset(self.config.training_data, feature_extractor)   

        train_size = int(0.8 * len(dataset))
        valid_size = len(dataset) - train_size
        train_dataset, valid_dataset = torch.utils.data.random_split(dataset, [train_size, valid_size])

        self.train_loader = DataLoader(train_dataset, batch_size=self.config.params_batch_size, shuffle=True)
        self.valid_loader = DataLoader(valid_dataset, batch_size=self.config.params_batch_size, shuffle=False)

    @staticmethod
    def save_model(model, path):
        torch.save(model, path)

    def train(self):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(device)
        logger.info(f"training on {device}")

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.model.parameters(), lr=self.config.params_learning_rate)    

        train_losses = []
        train_accuracies = []
        num_epochs = self.config.params_epochs

        for epoch in range(num_epochs):
            logger.info(f"epoch {epoch}")
            self.model.train()
            running_loss = 0.0
            correct = 0
            total = 0
            for inputs, labels in self.train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                #logger.info("data loaded")
                optimizer.zero_grad()
                outputs = self.model(inputs)
                #logger.info("get output")
                loss = criterion(outputs, labels)
                #logger.info("loss")
                loss.backward()
                optimizer.step()
                #logger.info("update")
                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            logger.info("calculate epoch loss")
            epoch_loss = running_loss / len(self.train_loader)
            epoch_acc = correct / total
            train_losses.append(epoch_loss)
            train_accuracies.append(epoch_acc)

            logger.info(f'Epoch {epoch + 1}, Loss: {epoch_loss}, Accuracy: {epoch_acc}')

            if epoch // 2:
                self.save_model(self.model, self.config.trained_model_path)


In [7]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:

device

device(type='cuda')

## Pipeline

In [9]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(training_config)
    training.get_training_model()
    training.train_valid_generator()
    logger.info("start training")
    training.train()

except Exception as e:
    raise e

[2024-02-10 17:37:57,260: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-02-10 17:37:57,261: INFO: common: yaml file: params.yaml loaded successfully]
[2024-02-10 17:37:57,262: INFO: common: created directory at: artifacts]
[2024-02-10 17:37:57,263: INFO: common: created directory at: artifacts/model_training]
[2024-02-10 17:37:57,683: INFO: 4274925046: start training]
[2024-02-10 17:37:57,819: INFO: 1256434790: training on cuda]
[2024-02-10 17:37:57,834: INFO: 1256434790: epoch 0]




[2024-02-10 17:38:07,529: INFO: 1256434790: calculate epoch loss]
[2024-02-10 17:38:07,529: INFO: 1256434790: Epoch 1, Loss: 0.6476555716335064, Accuracy: 0.5952380952380952]
[2024-02-10 17:38:07,529: INFO: 1256434790: epoch 1]
[2024-02-10 17:38:14,696: INFO: 1256434790: calculate epoch loss]
[2024-02-10 17:38:14,700: INFO: 1256434790: Epoch 2, Loss: 0.40632524829180466, Accuracy: 0.8392857142857143]
