In [2]:
import os

In [3]:
os.chdir("f:\\zizo\\MLOPS\\datascienceproject")

In [4]:
%pwd

'f:\\zizo\\MLOPS\\datascienceproject'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_name: str
    sequence_length: int
    optimizer: str
    learning_rate: float
    batch_size: int
    epochs: int
    patience: int
    target_column: str

In [6]:
from src.datascience.constants import *
from src.datascience.utils.common import read_yaml,create_directories

class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH):
        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)
        self.schema=read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.LSTM
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir = config.root_dir,
            data_path = config.data_path,
            model_name = config.model_name,
            sequence_length=params.sequence_length,
            optimizer = params.optimizer,
            learning_rate= params.learning_rate,
            batch_size = params.batch_size,
            epochs = params.epochs,
            patience = params.patience,
            target_column=schema.name
        )
        
        return model_trainer_config

In [None]:
import pandas as pd
import os
from src.datascience import logger
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split


class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
    def create_sequence_and_training(self):
        scaler = MinMaxScaler()
        df = pd.read_csv(self.config.data_path)
        
        scaled_data = scaler.fit_transform(df)

        sequence_length = self.config.sequence_length
        num_features = len(df.columns)

        sequences = []
        labels = []
        for i in range(len(scaled_data) - sequence_length):
            seq = scaled_data[i:i+sequence_length]
            label = scaled_data[i+sequence_length][3]
            sequences.append(seq)
            labels.append(label)

        sequences = np.array(sequences)
        labels = np.array(labels)

        train_size = int(0.8 * len(sequences))
        train_x, test_x = sequences[:train_size], sequences[train_size:]
        train_y, test_y = labels[:train_size], labels[train_size:]

        print("Train X shape:", train_x.shape)
        print("Train Y shape:", train_y.shape)
        print("Test X shape:", test_x.shape)
        print("Test Y shape:", test_y.shape)
        return train_x,train_y,test_x,test_y


    def Creating_model(self,train_x,train_y):
        model = Sequential([
            Input(shape=(train_x.shape[1], train_x.shape[2])),
            LSTM(units=128, return_sequences=True),
            Dropout(0.2),
            LSTM(units=64, return_sequences=True),
            Dropout(0.2),
            LSTM(units=32, return_sequences=False),
            Dropout(0.2),
            Dense(units=1)
        ])

        # Compile the model
        model.compile(optimizer=self.config.optimizer, loss='mean_squared_error')
        early_stopping = EarlyStopping(monitor='val_loss', patience=self.config.patience, restore_best_weights=True)
        model_checkpoint = ModelCheckpoint(os.path.join(self.config.root_dir, self.config.model_name), monitor='val_loss', save_best_only=True)

        # Train the model
        history = model.fit(
            train_x, train_y,
            epochs=self.config.epochs,
            batch_size=self.config.batch_size,
            validation_split=0.2,  # Use part of the training data as validation
            callbacks=[early_stopping, model_checkpoint]
)
    

In [10]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config = model_trainer_config)
    train_x ,train_y,test_x,test_y = model_trainer_config.create_sequence_and_training()
    model_trainer_config.Creating_model(train_x,train_y)
except Exception as e:
    raise e

[2025-04-15 19:48:46,968: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-04-15 19:48:46,971: INFO: common: yaml file: params.yaml loaded successfully]
[2025-04-15 19:48:46,973: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-04-15 19:48:46,974: INFO: common: created directory at: artifacts]
[2025-04-15 19:48:46,975: INFO: common: created directory at: artifacts/model_trainer]
Train X shape: (78748, 10, 9)
Train Y shape: (78748,)
Test X shape: (19688, 10, 9)
Test Y shape: (19688,)
Epoch 1/15
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 13ms/step - loss: 0.0150 - val_loss: 0.0042
Epoch 2/15
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - loss: 0.0051 - val_loss: 0.0031
Epoch 3/15
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 13ms/step - loss: 0.0039 - val_loss: 0.0018
Epoch 4/15
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 13ms/step - loss: 0.0026 - val_

# Model Evaulation

In [11]:
import os
os.environ['MLFLOW_TRACKING_URI'] = "https://dagshub.com/zizokosaa/datascienceproject.mlflow"
os.environ['MLFLOW_TRACKING_USERNAME'] = "zizokosaa"
os.environ['MLFLOW_TRACKING_PASSWORD'] = "0a814d4b710305469e1d136c3e175e425874d6af"

In [12]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelEvaluationConfig:
    root_dir: Path
    model_path: Path
    all_params: dict
    metric_file_name: Path
    target_column: str
    mlflow_uri: str

In [13]:
from src.datascience.constants import *
from src.datascience.utils.common import read_yaml,create_directories,save_json

class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH):
        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)
        self.schema=read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        params = self.params.LSTM
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir,
            model_path = config.model_path,
            all_params=params,
            metric_file_name=config.metric_file_name,
            target_column=schema.name,
            mlflow_uri="https://dagshub.com/zizokosaa/datascienceproject.mlflow"
        )
        
        return model_evaluation_config

In [22]:
import pandas as pd
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
from urllib.parse import urlparse
import mlflow
import mlflow.keras
import numpy as np
from tensorflow.keras.models import load_model


class ModelEvaluation:
    def __init__(self,config: ModelEvaluationConfig):
        self.config = config

    def eval_metrics(self,actual,pred):
        rmse = np.sqrt(mean_squared_error(actual,pred))
        mae = mean_absolute_error(actual,pred)
        r2 = r2_score(actual,pred)
        return rmse, mae, r2
    
    def log_into_mlflow(self,test_x,test_y):
        model = load_model(self.config.model_path)

        
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        with mlflow.start_run():
            predicted_qualities = model.predict(test_x)

            (rmse,mae,r2) = self.eval_metrics(test_y,predicted_qualities)

            # Saving metrics as local
            scores = {"rmse": rmse, "mae": mae,"r2":r2}
            save_json(path = Path(self.config.metric_file_name),data=scores)

            mlflow.log_params(self.config.all_params)

            mlflow.log_metric("rmse",rmse)
            mlflow.log_metric("mae",mae)
            mlflow.log_metric("r2",r2)


            # Model registry does not work with file store
            if tracking_url_type_store != "file":
                mlflow.keras.log_model(
                    model,
                    "model",
                    registered_model_name="LSTMModel"
                )
            else:
                mlflow.keras.log_model(model, "model")

In [24]:
try: 
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    model_evaluation.log_into_mlflow(test_x,test_y)
except Exception as e:
    raise e

[2025-04-15 22:31:13,785: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-04-15 22:31:13,787: INFO: common: yaml file: params.yaml loaded successfully]
[2025-04-15 22:31:13,789: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-04-15 22:31:13,790: INFO: common: created directory at: artifacts]
[2025-04-15 22:31:13,792: INFO: common: created directory at: artifacts/model_evaluation]
[1m616/616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
[2025-04-15 22:31:17,907: INFO: common: json file saved at: artifacts\model_evaluation\metrics.json]


Registered model 'LSTMModel' already exists. Creating a new version of this model...
2025/04/15 22:31:34 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LSTMModel, version 3
Created version '3' of model 'LSTMModel'.


🏃 View run popular-dolphin-623 at: https://dagshub.com/zizokosaa/datascienceproject.mlflow/#/experiments/0/runs/4b75be8df80b4b309ade4ded0544dd53
🧪 View experiment at: https://dagshub.com/zizokosaa/datascienceproject.mlflow/#/experiments/0
