In [1]:
import os 

In [2]:
%pwd

'd:\\Maintenance-cost-reduction\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'd:\\Maintenance-cost-reduction'

In [24]:
from dataclasses import dataclass
from src.constant import *
from pathlib import Path
import pickle 
from src.exception.exception import CustomException
from src.utils.common import load_yaml,create_directories,save_model


In [6]:
@dataclass(frozen=True)
class ModelTrainerConfig:
     train_path: Path
     test_path:  Path
     model_path: Path

In [7]:
class ConfrigurationManager:
     def __init__(self,
                  config_filepath=CONFIG_PATH,
                  params_filepath=PARAMS_PATH,
                  schema_filepath=SCHEMA_PATH):
          self.config=load_yaml(config_filepath)
          self.params=load_yaml(params_filepath)
          self.schema=load_yaml(schema_filepath)
          create_directories([Path(self.config['artifacts_root'])])
          
     def get_model_trainer_config(self)->ModelTrainerConfig:
          config=self.config.model_trainer
          
          model_trainer_config=ModelTrainerConfig(
               train_path=config.train_path,
               test_path=config.test_path,
               model_path=config.model_path
               
          )
          return model_trainer_config
          

In [10]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import  SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import  AdaBoostClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report,accuracy_score
import os 
import sys 
import pandas as pd 
import numpy as np 

In [25]:


class ModelTrainer:
     def __init__(self, config):
          self.config = config
          
     def read_data(self):
          try:
              train_data = pd.read_csv(self.config.train_path)
              test_data = pd.read_csv(self.config.test_path)
              return train_data, test_data
          except Exception as e:
               raise CustomException(e, sys)
          
     def split_data(self):
          try:
               train_data, test_data = self.read_data()
               X_train = train_data.drop(columns=['failure'])
               y_train = train_data['failure']
               X_test = test_data.drop(columns=['failure'])
               y_test = test_data['failure']
               return X_train, X_test, y_train, y_test
          
          except Exception as e:
               raise CustomException(e, sys)
          
     def standard_scale(self):
          try:
               X_train, X_test, y_train, y_test = self.split_data()
               scaler = StandardScaler()
               X_train_scaled = scaler.fit_transform(X_train)
               X_test_scaled = scaler.transform(X_test)
               return X_train_scaled, X_test_scaled, y_train, y_test
          except Exception as e:
               raise CustomException(e, sys)
     
     def model_train(self, models):
          model_accuracies = {}
          model_objects = {}
          try:
               X_train_scaled,X_test_scaled,  y_train, y_test = self.standard_scale()
               for model_name, model in models.items():
                    model.fit(X_train_scaled, y_train)
                    y_pred = model.predict(X_test_scaled)
                    accuracy = accuracy_score(y_test, y_pred)
                    model_accuracies[model_name] = accuracy
                    model_objects[model_name] = model 
                    print(f"{model_name}: Accuracy score is: {accuracy}")
                    print(confusion_matrix(y_test, y_pred))
                    print("="*40)
               return model_accuracies, model_objects, X_train_scaled, y_train
          except Exception as e:
               raise CustomException(e, sys)
          
     def get_best_model(self, models):
        try:
            model_accuracies, model_objects, X_train_scaled, y_train = self.model_train(models)
            best_model_name = max(model_accuracies, key=model_accuracies.get)
            best_model = model_objects[best_model_name]  # Retrieve the best model object
            best_accuracy = model_accuracies[best_model_name]
            print(f"Best Model: {best_model_name}")
            return best_model, best_accuracy, X_train_scaled, y_train
        except Exception as e:
            raise CustomException(e, sys)
    
     def save_model_best(self, models):
        try:
            best_model, best_accuracy, X_train_scaled, y_train = self.get_best_model(models)
            best_model.fit(X_train_scaled, y_train)  # Ensure best_model is the actual model object
            
            # Construct the directory path for the model
            model_directory = os.path.join(os.getcwd(), self.config.model_path)
            print(f"Model directory: {model_directory}")  # Debug print
            
            # Check if the directory exists, and if not, create it
            if not os.path.exists(model_directory):
                print(f"Creating directory: {model_directory}")  # Debug print
                os.makedirs(model_directory)
            else:
                print(f"Directory already exists: {model_directory}")  # Debug print
            
            # Construct the full path for the model
            model_path = os.path.join(model_directory, 'model.pkl')
            print(f"Saving model to: {model_path}")  # Debug print
            
            # Save the model
            with open(model_path, 'wb') as f:
                pickle.dump(best_model, f)
            print("Model saved successfully!")  # Debug print
            
        except Exception as e:
            raise CustomException(e, sys)

In [26]:
models={
     'LogisticRegression':LogisticRegression(),
     'SVC':SVC(),
     'DecisionTreeClassifier':DecisionTreeClassifier(),
     'GaussianNB':GaussianNB(),
     'RandomForestClassifier':RandomForestClassifier(),
     'KNeighborsClassifier':KNeighborsClassifier(),
     'AdaBoostClassifier':AdaBoostClassifier(learning_rate=1)
}

In [27]:
try:
     config=ConfrigurationManager()
     data_ingestion_config=config.get_model_trainer_config()
     data_ingestion=ModelTrainer(data_ingestion_config)
     data=data_ingestion.save_model_best(models)
     # data_ingestion.save_file(data)
except Exception as e:
     raise e

[2024-03-13 17:50:54,446: INFO: common: start the loading  the config\config.yaml file ]
[2024-03-13 17:50:54,454: INFO: common: Load the config\config.yaml successfully]
[2024-03-13 17:50:54,455: INFO: common: start the loading  the params.yaml file ]
[2024-03-13 17:50:54,458: INFO: common: Load the params.yaml successfully]
[2024-03-13 17:50:54,459: INFO: common: start the loading  the schema.yaml file ]
[2024-03-13 17:50:54,461: INFO: common: Load the schema.yaml successfully]
[2024-03-13 17:50:54,462: INFO: common: Enter into the create directores in method]
[2024-03-13 17:50:54,463: INFO: common: artifacts is create >>>>]
LogisticRegression: Accuracy score is: 0.8694706838902998
[[24937  2281]
 [ 4863 22650]]
SVC: Accuracy score is: 0.9793535656209461
[[26752   466]
 [  664 26849]]
DecisionTreeClassifier: Accuracy score is: 0.9989220003288812
[[27176    42]
 [   17 27496]]
GaussianNB: Accuracy score is: 0.8575213316036615
[[25338  1880]
 [ 5918 21595]]
RandomForestClassifier: Accu