<a href="https://colab.research.google.com/github/yachanachoudhary/MLOPS/blob/main/MLPOS_ROGRAM_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import numpy as np
import pandas as pd
import os
from datetime import datetime
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Class to store and save the model details along with evaluation metrics
class ModelObject:
    def __init__(self, model_name, model, params, best_params, evaluation_metrics, version, timestamp):
        self.model_name = model_name
        self.model = model
        self.params = params
        self.best_params = best_params
        self.evaluation_metrics = evaluation_metrics
        self.version = version
        self.timestamp = timestamp

    def log_details(self):
        log_message = f"Model: {self.model_name} (Version: {self.version}, Timestamp: {self.timestamp})\n"
        log_message += f"Initial Parameters: {self.params}\n"
        log_message += f"Best Parameters after tuning: {self.best_params}\n"
        log_message += f"Evaluation Metrics: {self.evaluation_metrics}\n"
        return log_message

    def save(self, save_path):
        joblib.dump(self, save_path)
        print(f"Model saved at: {save_path}")

# Class for dataset handling
class Dataset:
    def __init__(self):
        self.data = None
        self.target = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None

    def load_data(self):
        iris = load_iris()
        self.data = iris.data
        self.target = iris.target

    def preprocess(self):
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.data, self.target, test_size=0.2, random_state=42
        )

# Class for model selection and tuning
class ModelSelector:
    def __init__(self):
        self.models = {
            'RandomForest': RandomForestClassifier(),
            'SVM': SVC(),
            'LogisticRegression': LogisticRegression(max_iter=200)
        }
        self.best_model_object = None
        self.version = 1  # Start from version 1

    def hyperparameter_tuning(self, model, param_grid, X_train, y_train):
        grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1, verbose=1)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_, grid_search.best_params_

    def select_model(self, X_train, y_train, X_test, y_test):
        param_grids = {
            'RandomForest': {'n_estimators': [10, 50, 100], 'max_depth': [3, 5, 7]},
            'SVM': {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']},
            'LogisticRegression': {'C': [0.01, 0.1, 1]}
        }

        best_score = 0
        for model_name, model in self.models.items():
            print(f"Tuning {model_name}...")
            tuned_model, best_params = self.hyperparameter_tuning(model, param_grids[model_name], X_train, y_train)

            y_pred = tuned_model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            evaluation_metrics = classification_report(y_test, y_pred, output_dict=True)

            print(f"{model_name} Test Accuracy: {accuracy:.4f}")

            if accuracy > best_score:
                best_score = accuracy
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                self.best_model_object = ModelObject(
                    model_name=model_name,
                    model=tuned_model,
                    params=param_grids[model_name],
                    best_params=best_params,
                    evaluation_metrics={"accuracy": accuracy, "classification_report": evaluation_metrics},
                    version=self.version,
                    timestamp=timestamp
                )

        print(f"\n✅ Best Model: {self.best_model_object.model_name}")
        return self.best_model_object

    def save_best_model(self):
        if self.best_model_object:
            if not os.path.exists("models"):
                os.makedirs("models")

            # Save model with version and timestamp
            filename = f"{self.best_model_object.model_name}_v{self.version}_{self.best_model_object.timestamp}.pkl"
            save_path = os.path.join("models", filename)
            self.best_model_object.save(save_path)

            # Save log
            with open("models/model_log.txt", "a") as log_file:
                log_file.write(self.best_model_object.log_details() + "\n\n")

            self.version += 1  # Increment version

# Main AutoML Pipeline
class AutoMLPipeline:
    def __init__(self):
        self.dataset = Dataset()
        self.model_selector = ModelSelector()

    def run(self):
        print("🚀 Loading and Preprocessing Data...")
        self.dataset.load_data()
        self.dataset.preprocess()

        print("🔍 Selecting the best model...")
        best_model = self.model_selector.select_model(
            self.dataset.X_train, self.dataset.y_train,
            self.dataset.X_test, self.dataset.y_test
        )

        print("💾 Saving the best model...")
        self.model_selector.save_best_model()

# Run the pipeline
if __name__ == "__main__":
    pipeline = AutoMLPipeline()
    pipeline.run()

🚀 Loading and Preprocessing Data...
🔍 Selecting the best model...
Tuning RandomForest...
Fitting 5 folds for each of 9 candidates, totalling 45 fits
RandomForest Test Accuracy: 1.0000
Tuning SVM...
Fitting 5 folds for each of 6 candidates, totalling 30 fits
SVM Test Accuracy: 1.0000
Tuning LogisticRegression...
Fitting 5 folds for each of 3 candidates, totalling 15 fits
LogisticRegression Test Accuracy: 1.0000

✅ Best Model: RandomForest
💾 Saving the best model...
Model saved at: models/RandomForest_v1_20250814_065031.pkl
