In [3]:
import os 
%pwd
os.chdir("../")
%pwd

'd:\\My Workspace\\proj_file\\Car-Price-Prediction'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DatatransformationConfig:
    root_dir: Path
    data_path: Path

In [5]:
from car_price_pred.constants import *
from car_price_pred.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        
        create_directories([self.config.artifacts_root])
        
        
    def get_data_transformation_config(self) -> DatatransformationConfig:
        
        config = self.config.data_transformation
        
        create_directories([config.root_dir])
        
        
        data_transformation_config = DatatransformationConfig(
            root_dir= config.root_dir,
            data_path=config.data_path,
        )
        
        return data_transformation_config

In [7]:
import os
from car_price_pred import logger
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd


In [13]:
class DataTransformation:
    def __init__(self, config: DatatransformationConfig):
        self.config = config

    def preprocessing(self):
        df = pd.read_csv(self.config.data_path)
        
        df = df.dropna()
        
        df.drop_duplicates(inplace=True)
        
        encoder = LabelEncoder()
        
        df['Fuel_Type'] = encoder.fit_transform(df['Fuel_Type'])
        df['Seller_Type'] = encoder.fit_transform(df['Seller_Type'])
        df['Transmission'] = encoder.fit_transform(df['Transmission'])
        df['Car_Name'] = encoder.fit_transform(df['Car_Name'])  
        
        train,test = train_test_split(df,random_state=42,test_size=0.2)
        
        train.to_csv(os.path.join(self.config.root_dir, "train.csv"),index = False)
        test.to_csv(os.path.join(self.config.root_dir, "test.csv"),index = False)

        logger.info("Splited data into training and test sets")
        logger.info(train.shape)
        logger.info(test.shape)
        logger.info(train.head())
        logger.info(test.head())

        print(train.shape)
        print(test.shape)
        print(train.head())
        print(test.head())
        
        

In [14]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.preprocessing()
except Exception as e:
    raise e

[2024-06-17 00:29:19,367: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-06-17 00:29:19,370: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-06-17 00:29:19,373: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-06-17 00:29:19,374: INFO: common: created directory at: artifacts]
[2024-06-17 00:29:19,375: INFO: common: created directory at: artifacts/data_transformation]
[2024-06-17 00:29:19,573: INFO: 2886460921: Splited data into training and test sets]
[2024-06-17 00:29:19,574: INFO: 2886460921: (239, 9)]
[2024-06-17 00:29:19,574: INFO: 2886460921: (60, 9)]
[2024-06-17 00:29:19,576: INFO: 2886460921:      Car_Name  Year  Selling_Price  Present_Price  Kms_Driven  Fuel_Type  \
6          68  2015           6.75           8.12       18796          2   
185        50  2008           0.25           0.58        1900          2   
187        36  2013           0.25           0.51       32000          2   
148        15  2010           0.52