In [32]:
import os

In [33]:
os.chdir("f:\\zizo\\MLOPS\\datascienceproject")

In [34]:
%pwd

'f:\\zizo\\MLOPS\\datascienceproject'

In [35]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_name: str
    sequence_length: int
    optimizer: str
    learning_rate: float
    batch_size: int
    epochs: int
    patience: int
    target_column: str

In [36]:
from src.datascience.constants import *
from src.datascience.utils.common import read_yaml,create_directories

class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH):
        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)
        self.schema=read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.LSTM
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir = config.root_dir,
            data_path = config.data_path,
            model_name = config.model_name,
            sequence_length=params.sequence_length,
            optimizer = params.optimizer,
            learning_rate= params.learning_rate,
            batch_size = params.batch_size,
            epochs = params.epochs,
            patience = params.patience,
            target_column=schema.name
        )
        
        return model_trainer_config

In [43]:
import pandas as pd
import os
from src.datascience import logger
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
    def create_sequence_and_training(self):
        scaler = MinMaxScaler()
        df = pd.read_csv(self.config.data_path)

        scaled_data = scaler.fit_transform(df)

        sequence_length = self.config.sequence_length
        num_features = len(df.columns)

        sequences = []
        labels = []
        for i in range(len(scaled_data) - sequence_length):
            seq = scaled_data[i:i+sequence_length]
            label = scaled_data[i+sequence_length][3]
            sequences.append(seq)
            labels.append(label)

        sequences = np.array(sequences)
        labels = np.array(labels)

        train_size = int(0.8 * len(sequences))
        train_x, test_x = sequences[:train_size], sequences[train_size:]
        train_y, test_y = labels[:train_size], labels[train_size:]

        print("Train X shape:", train_x.shape)
        print("Train Y shape:", train_y.shape)
        print("Test X shape:", test_x.shape)
        print("Test Y shape:", test_y.shape)
        return train_x,train_y

    def Creating_model(self,train_x,train_y):
        model = Sequential([
            Input(shape=(train_x.shape[1], train_x.shape[2])),
            LSTM(units=128, return_sequences=True),
            Dropout(0.2),
            LSTM(units=64, return_sequences=True),
            Dropout(0.2),
            LSTM(units=32, return_sequences=False),
            Dropout(0.2),
            Dense(units=1)
        ])

        # Compile the model
        model.compile(optimizer=self.config.optimizer, loss='mean_squared_error')
        early_stopping = EarlyStopping(monitor='val_loss', patience=self.config.patience, restore_best_weights=True)
        model_checkpoint = ModelCheckpoint(os.path.join(self.config.root_dir, self.config.model_name), monitor='val_loss', save_best_only=True)

        # Train the model
        history = model.fit(
            train_x, train_y,
            epochs=self.config.epochs,
            batch_size=self.config.batch_size,
            validation_split=0.2,  # Use part of the training data as validation
            callbacks=[early_stopping, model_checkpoint]
)
    

In [44]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config = model_trainer_config)
    train_x ,train_y = model_trainer_config.create_sequence_and_training()
    model_trainer_config.Creating_model(train_x,train_y)
except Exception as e:
    raise e

[2025-04-15 15:04:18,945: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-04-15 15:04:18,946: INFO: common: yaml file: params.yaml loaded successfully]
[2025-04-15 15:04:18,949: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-04-15 15:04:18,949: INFO: common: created directory at: artifacts]
[2025-04-15 15:04:18,950: INFO: common: created directory at: artifacts/model_trainer]
Train X shape: (78748, 10, 9)
Train Y shape: (78748,)
Test X shape: (19688, 10, 9)
Test Y shape: (19688,)
Epoch 1/15
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 13ms/step - loss: 0.0147 - val_loss: 0.0041
Epoch 2/15
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12ms/step - loss: 0.0052 - val_loss: 0.0029
Epoch 3/15
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 13ms/step - loss: 0.0038 - val_loss: 0.0018
Epoch 4/15
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 13ms/step - loss: 0.0027 - val_