In [1]:
import os

os.chdir("../")

In [2]:
%pwd

'/Users/vince/code/projects/aws-mlflow-mlops'

In [3]:
# Config entity
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    alpha: float
    l1_ratio: float
    target_column: str

In [4]:
# Configuration manager
from src.aws_mlflow_mlops.constants import *
from src.aws_mlflow_mlops.utils.common import read_yaml, create_directories


class ConfigurationManager:
    """
    The ConfigurationManager class is responsible for managing the configuration of the data ingestion process.
    It reads configuration, parameters, and schema from YAML files, and creates necessary directories.

    Attributes:
        config (dict): Configuration read from a YAML file.
        params (dict): Parameters read from a YAML file.
        schema (dict): Schema read from a YAML file.
    """

    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH,
        schema_filepath=SCHEMA_FILE_PATH,
    ):
        """
        The constructor for ConfigurationManager class.

        Parameters:
            config_filepath (str): The path to the configuration YAML file.
            params_filepath (str): The path to the parameters YAML file.
            schema_filepath (str): The path to the schema YAML file.
        """
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        """
        The method to get the model trainer configuration.

        This method retrieves the model trainer configuration from the main configuration object,
        the ElasticNet parameters, and the target column schema.
        It creates the directories for the root directory in the configuration if they do not exist,
        creates a ModelTrainerConfig instance with the root directory, train data path, test data path, model name,
        alpha, l1_ratio, and target column name from the configuration, parameters, and schema,
        and returns this instance.

        Returns:
            ModelTrainerConfig: An instance of ModelTrainerConfig with the root directory, train data path, test data path,
            model name, alpha, l1_ratio, and target column name from the configuration, parameters, and schema.
        """
        config = self.config.model_trainer
        params = self.params.ElasticNet
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            alpha=params.alpha,
            l1_ratio=params.l1_ratio,
            target_column=schema.name,
        )

        return model_trainer_config

In [5]:
# Define component
import os

import joblib
import pandas as pd
from sklearn.linear_model import ElasticNet

from src.aws_mlflow_mlops import logger


class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train(self):
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)

        train_x = train_data.drop(self.config.target_column, axis=1)
        test_x = test_data.drop(self.config.target_column, axis=1)
        train_y = train_data[[self.config.target_column]]
        test_y = test_data[[self.config.target_column]]

        lr = ElasticNet(
            alpha=self.config.alpha, l1_ratio=self.config.l1_ratio, random_state=42
        )
        lr.fit(train_x, train_y)

        joblib.dump(lr, os.path.join(self.config.root_dir, self.config.model_name))

In [6]:
# Pipeline
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    logger.error(e)
    raise e

[2023-11-22 17:42:44,628: INFO: common: yaml file: config/config.yaml loaded successfully]
[2023-11-22 17:42:44,629: INFO: common: yaml file: params.yaml loaded successfully]
[2023-11-22 17:42:44,632: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-11-22 17:42:44,632: INFO: common: created directory at: artifacts]
[2023-11-22 17:42:44,633: INFO: common: created directory at: artifacts/model_trainer]
