In [1]:
import os 

In [2]:
%pwd


'c:\\Users\\PC\\Desktop\\Maintenance-cost-reduction\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'c:\\Users\\PC\\Desktop\\Maintenance-cost-reduction'

In [5]:
from dataclasses import dataclass
from src.constant import *
from pathlib import Path
from src.exception.exception import CustomException
from src.utils.common import load_yaml,create_directories

In [6]:
@dataclass(frozen=True)
class DataTransformationConfig:
     data_path: Path
     train_path: Path
     test_path:  Path
     cleaned_data: Path

In [7]:
class ConfrigurationManager:
     def __init__(self,
                  config_filepath=CONFIG_PATH,
                  params_filepath=PARAMS_PATH,
                  schema_filepath=SCHEMA_PATH):
          self.config=load_yaml(config_filepath)
          self.params=load_yaml(params_filepath)
          self.schema=load_yaml(schema_filepath)
          create_directories([Path(self.config['artifacts_root'])])
          
     def get_data_tranformation_initiate(self)->DataTransformationConfig:
          config=self.config.data_transformation
          
          data_tranfromation_config=DataTransformationConfig(
               data_path=config.data_path,
               train_path=config.train_path,
               test_path=config.test_path,
               cleaned_data=config.cleaned_data
               
          )
          return data_tranfromation_config
          

In [8]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import os 
import sys 
import pandas as pd 
import numpy as np 

In [17]:
class Data_Transformation:
     def __init__(self,config:DataTransformationConfig):
          self.config=config
     
     def convert_date(self,df):
          try:
               df['date']=pd.to_datetime(df['date'])

               df['Year'] = df['date'].apply(lambda x: x.year)
               df['months'] = df['date'].apply(lambda x: x.month)
               df['day'] = df['date'].apply(lambda x: x.day)
               # print(df)

               df.drop(columns=['Year'],inplace=True,axis=1)
               return df
          except Exception as e:
               raise CustomException(e,sys)

     
     def convert_skewness(self,data):
          try:
               date=self.convert_date(data)
               for num in ["2","3","4","7","8","9"]:
                    date[f'metric{num}'] = np.log1p(date[f'metric{num}'])
               date.drop(columns=['metric8'],inplace=True,axis=1)
               return date
          except Exception as e:
               raise CustomException(e)
          
          
          
     def convert_model_name(self,data):
          try:
               data1=self.convert_skewness(data)
               data1['model_name']=data1['device'].apply(lambda x:x[:4])
               # Assuming `data` is your DataFrame and you want to remove rows where `model_name` equals 'Z1F2'
               # data = data[data['model_name'] != 'Z1F2']
               label=LabelEncoder()
               data1['model_name']=label.fit_transform(data1['model_name'])
               return data1
               

          except Exception as e:
               raise CustomException(e,sys)
          
     def clean_data(self,data):
          try:
               data2=self.convert_model_name(data)
               # print(data2)
               save_path=os.path.join(self.config.cleaned_data,'cleaned_data')
               
               # import os

               # # Assuming `save_path` is the full path to the file, including the filename
               # save_path = os.path.join(self.config.cleaned_data, 'cleaned_data.csv')

               # Create the directory if it does not exist
               os.makedirs(os.path.dirname(save_path), exist_ok=True)

               # Now you can safely save the DataFrame
               # data2.to_csv(save_path, index=False)

               data2.to_csv(save_path,index=False)
               return data2
               
          except Exception as e:
               raise CustomException(e,sys)
          
     def train_test_split(self):
          try:
               data1=pd.read_csv(self.config.data_path)
               data=self.clean_data(data1)
               train_data,test_data=train_test_split(data,test_size=0.22,random_state=42)
               train_data.drop(columns=['date','device'],inplace=True,axis=1)
               test_data.drop(columns=['date','device'],inplace=True,axis=1)
               save_path1=os.path.join(self.config.train_path,'train_data')
               os.makedirs(os.path.dirname(save_path1), exist_ok=True)
               # data.to_csv(save_path,index=False)
               save_path2=os.path.join(self.config.test_path,'test_data')
               os.makedirs(os.path.dirname(save_path2), exist_ok=True)
               train_data.to_csv(save_path1,index=False)
               test_data.to_csv(save_path2,index=False)
               return train_data,test_data
               
          except Exception as e:
               raise CustomException(e,sys)
               

In [18]:
try:
     config=ConfrigurationManager()
     data_ingestion_config=config.get_data_tranformation_initiate()
     data_ingestion=Data_Transformation(data_ingestion_config)
     data=data_ingestion.train_test_split()
     # data_ingestion.save_file(data)
except Exception as e:
     raise e

[2024-03-08 15:56:30,415: INFO: common: start the loading  the config\config.yaml file ]
[2024-03-08 15:56:30,420: INFO: common: Load the config\config.yaml successfully]
[2024-03-08 15:56:30,420: INFO: common: start the loading  the params.yaml file ]
[2024-03-08 15:56:30,424: INFO: common: Load the params.yaml successfully]
[2024-03-08 15:56:30,425: INFO: common: start the loading  the schema.yaml file ]
[2024-03-08 15:56:30,429: INFO: common: Load the schema.yaml successfully]
[2024-03-08 15:56:30,429: INFO: common: Enter into the create directores in method]
[2024-03-08 15:56:30,430: INFO: common: artifacts is create >>>>]


  df['date']=pd.to_datetime(df['date'])
