In [1]:
import os
os.chdir('../../')
%pwd

'/home/utpal108/dev/Upwork/Projects/Diabetic-Retinopathy-Prediction'

In [2]:
# Config Entity
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class MLModelTrainingConfig:
    best_ml_model_path: Path
    

In [3]:
from diabeticRetinopathy.constants import *
from diabeticRetinopathy.utils import create_directories, read_yaml

In [4]:
# Configuration Manager
class ConfigurationManager:
    def __init__(self, config_filepath = CONFIG_FILE_PATH, params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_ml_model_training_config(self) -> MLModelTrainingConfig:
        config= self.config.training
        create_directories([config.root_dir])

        model_training_config = MLModelTrainingConfig(
            best_ml_model_path = Path(config.best_ml_model_path)
        )

        return  model_training_config


In [5]:
from diabeticRetinopathy.utils import save_object
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import numpy as np

In [6]:
# Component
class ModelTraining:
    def __init__(self, config: MLModelTrainingConfig):
        self.config = config
  
    def _evaluate_model(self, models, X_train, X_test, y_train, y_test):
        try:
            report = {}
            best_model = {'': -np.inf}

            # Evaluate the models base on the 
            for i in range(len(models)):
                model_name = list(models.keys())[i]
                model = list(models.values())[i]
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                score = accuracy_score(y_test, y_pred)
                if list(best_model.values())[0] < score:
                    best_model = {model_name: score}

                report[model_name] = score

            return report, best_model

        except Exception as e:
            raise e

    def initiate_model_training(self, X_train, X_test, y_train, y_test):
        try:

            # List of the Models
            models = {
                'SVC': SVC(kernel='linear', gamma='scale'),
                'DecisionTree': DecisionTreeClassifier(),
                'RandomForest': RandomForestClassifier(criterion='entropy', max_features='sqrt'),
                'GradientBoosting': GradientBoostingClassifier(criterion='squared_error', loss='exponential'),
                'KNeighbors': KNeighborsClassifier(algorithm='auto',n_neighbors=9, weights='distance')
            }

            print(type(models))

            # Find the best model
            model_report, best_model = self._evaluate_model(models=models, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
            print(model_report)
            print(best_model)
            best_model = models[list(best_model.keys())[0]]

            # Save the best model
            save_object(self.config.best_ml_model_path, best_model)

        except Exception as e:
            raise e


In [7]:
from diabeticRetinopathy.components.diabetes.data_preprocessing import DataPreprocessing
from diabeticRetinopathy.config import ConfigurationManager

In [8]:
# Pipeline
try:
    config = ConfigurationManager()
    data_preprocessing_config = config.get_ml_data_preprocessing_config()
    ml_model_training_config = config.get_ml_model_training_config()

    data_preprocessing = DataPreprocessing(config=data_preprocessing_config)
    X_train, X_test, y_train, y_test = data_preprocessing.initiate_data_preprocessing()

    model_training = ModelTraining(config=ml_model_training_config)
    model_training.initiate_model_training(X_train, X_test, y_train, y_test)

except Exception as e:
    raise e

2024-04-18 00:19:39,404 : diabeticRetinopathy.logger - INFO - YAML file: config/config.yaml loaded successfully
2024-04-18 00:19:39,407 : diabeticRetinopathy.logger - INFO - YAML file: params.yaml loaded successfully
2024-04-18 00:19:39,408 : diabeticRetinopathy.logger - INFO - created directory at: artifacts
2024-04-18 00:19:39,408 : diabeticRetinopathy.logger - INFO - created directory at: artifacts/preprocessor
2024-04-18 00:19:39,409 : diabeticRetinopathy.logger - INFO - created directory at: artifacts/training


<class 'dict'>
{'SVC': 0.7566666666666667, 'DecisionTree': 0.7366666666666667, 'RandomForest': 0.8033333333333333, 'GradientBoosting': 0.7966666666666666, 'KNeighbors': 0.7633333333333333}
{'RandomForest': 0.8033333333333333}
