In [10]:
# importing dependencies

import os
import cv2
import logging
import numpy as np
from pathlib import Path
from torchvision import transforms
from pathlib import Path

In [2]:
%pwd

'e:\\AI DA Portfolio\\Potato-Disease\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'e:\\AI DA Portfolio\\Potato-Disease'

In [34]:
from dataclasses import dataclass

@dataclass(frozen=True)
class DataTransformationConfig:
    transformed_dir: Path
    unzip_train_dir: Path
    unzip_val_dir: Path
    unzip_test_dir: Path

In [6]:
from PotatoDisease.constants import *
from PotatoDisease.utils.common import read_yaml, create_directories

In [31]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH
    ):
        self.config = read_yaml(Path(config_filepath))
        self.params = read_yaml(Path(params_filepath))

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        return DataTransformationConfig(
            transformed_dir=Path(config.transformed_dir),
            unzip_train_dir=Path(config.unzip_train_dir),
            unzip_val_dir=Path(config.unzip_val_dir),
            unzip_test_dir=Path(config.unzip_test_dir)
        )

In [38]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config
        self.logger = logging.getLogger(__name__)

    def resize_image(self, image, size=(256, 256)):
        """Resize the image to the given size."""
        return cv2.resize(image, size)

    def augment_image(self, image):
        """Apply augmentation techniques to the image."""
        transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(10),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1)
        ])
        image = transforms.ToPILImage()(image)
        augmented_image = transform(image)
        return np.array(augmented_image)

    def transform_images(self, input_dir: Path, output_dir: Path):
        """Transform images by resizing and augmenting."""
        os.makedirs(output_dir, exist_ok=True)
        for root, _, files in os.walk(input_dir):
            for file in files:
                file_path = os.path.join(root, file)
                image = cv2.imread(file_path)
                if image is None:
                    self.logger.warning(f"Could not read image: {file_path}")
                    continue
                
                resized_image = self.resize_image(image)
                resized_image_path = os.path.join(output_dir, f"resized_{file}")
                cv2.imwrite(resized_image_path, resized_image)

                augmented_image = self.augment_image(resized_image)
                augmented_image_path = os.path.join(output_dir, f"augmented_{file}")
                cv2.imwrite(augmented_image_path, augmented_image)

    def run(self):
        try:
            self.logger.info("Starting data transformation...")

            # Process train images
            train_output_dir = os.path.join(self.config.transformed_dir, 'train')
            self.transform_images(self.config.unzip_train_dir, train_output_dir)

            # Process validation images
            val_output_dir = os.path.join(self.config.transformed_dir, 'val')
            self.transform_images(self.config.unzip_val_dir, val_output_dir)

            # Process test images
            test_output_dir = os.path.join(self.config.transformed_dir, 'test')
            self.transform_images(self.config.unzip_test_dir, test_output_dir)

            self.logger.info("Data transformation completed. Transformed images saved.")
        except Exception as e:
            self.logger.exception(e)


In [39]:
try:
    # Initialize the configuration manager
    config = ConfigurationManager()
    
    # Retrieve the data transformation configuration
    data_transformation_config = config.get_data_transformation_config()
    
    # Create an instance of DataTransformation
    data_transformation = DataTransformation(config=data_transformation_config)
    
    # Run the transformation process
    data_transformation.run()
    
    # Verify that transformed datasets are saved
    transformed_images_dir = data_transformation_config.transformed_dir
    
    # Check if the output directory exists and contains files
    if any(Path(transformed_images_dir).iterdir()):
        print(f"Success: Transformed datasets saved in {transformed_images_dir}.")
    else:
        print("Failure: No transformed datasets found.")

except Exception as e:
    print(f"An error occurred: {e}")

[2024-08-12 09:34:44,575: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-08-12 09:34:44,577: INFO: common: yaml file: params.yaml loaded successfully]
[2024-08-12 09:34:44,578: INFO: common: created directory at: artifacts]
[2024-08-12 09:34:44,579: INFO: 1155744968: Starting data transformation...]
[2024-08-12 09:35:13,198: INFO: 1155744968: Data transformation completed. Transformed images saved.]
Success: Transformed datasets saved in artifacts\transformed_datasets.
