In [1]:
import os 
%pwd

'd:\\My Workspace\\proj_file\\End-To-End-Wine-Quality-Pred\\research'

In [2]:
os.chdir("../")
%pwd

'd:\\My Workspace\\proj_file\\End-To-End-Wine-Quality-Pred'

In [7]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DatatransformationConfig:
    root_dir: Path
    data_path: Path


In [8]:
from src.constants import *
from src.utils.common import read_yaml, create_directories


class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        
        create_directories([self.config.artifacts_root])
        
        
    def get_data_transformation_config(self) -> DatatransformationConfig:
        
        config = self.config.data_transformation
        
        create_directories([config.root_dir])
        
        
        data_transformation_config = DatatransformationConfig(
            root_dir= config.root_dir,
            data_path=config.data_path,
        )
        
        return data_transformation_config

In [9]:
import os
import logging
from sklearn.model_selection import train_test_split
import pandas as pd

class DataTransformation:
    def __init__(self, config: DatatransformationConfig):
        self.config = config

    def preprocessing(self):
        df = pd.read_csv(self.config.data_path)
        
        tar_col = df['quality']
        
        train,test = train_test_split(df,random_state=2,test_size=0.2,stratify=tar_col)
        
        train.to_csv(os.path.join(self.config.root_dir, "train.csv"),index = False)
        test.to_csv(os.path.join(self.config.root_dir, "test.csv"),index = False)

        logging.info("Splited data into training and test sets")
        logging.info(train.shape)
        logging.info(test.shape)
        logging.info(train.head())
        logging.info(test.head())

        print(train.shape)
        print(test.shape)
        print(train.head())
        print(test.head())

In [10]:
import sys 
from src.exception import CustomException
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.preprocessing()
except Exception as e:
    raise CustomException(e,sys)


[ 2024-06-22 08:47:01,290 ] 33 root - INFO - yaml file: config\config.yaml loaded successfully
[ 2024-06-22 08:47:01,292 ] 33 root - INFO - yaml file: params.yaml loaded successfully
[ 2024-06-22 08:47:01,295 ] 33 root - INFO - yaml file: schema.yaml loaded successfully
[ 2024-06-22 08:47:01,296 ] 53 root - INFO - created directory at: artifacts
[ 2024-06-22 08:47:01,297 ] 53 root - INFO - created directory at: artifacts/data_transformation
[ 2024-06-22 08:47:01,318 ] 20 root - INFO - Splited data into training and test sets
[ 2024-06-22 08:47:01,319 ] 21 root - INFO - (1279, 12)
[ 2024-06-22 08:47:01,320 ] 22 root - INFO - (320, 12)
[ 2024-06-22 08:47:01,321 ] 23 root - INFO -       fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
831             5.9              0.61         0.08             2.1      0.071   
691             9.2              0.92         0.24             2.6      0.087   
222             6.8              0.61         0.04             1.5    