In [3]:
import os
import cv2
import numpy as np
import random
import shutil
from tqdm import tqdm

class PreciseImageAugmenter:
    def __init__(self, source_dir, target_count=900, output_dir=None):
        """
        Inisialisasi augmentasi gambar presisi
        
        Args:
            source_dir (str): Direktori sumber gambar
            target_count (int): Target total gambar per kelas
            output_dir (str, optional): Direktori output
        """
        self.source_dir = source_dir
        self.target_count = target_count
        self.output_dir = output_dir or os.path.join(source_dir, 'augmented')
        
        # Pastikan direktori output ada
        os.makedirs(self.output_dir, exist_ok=True)
        
        # Ekstensi gambar yang didukung
        self.image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
        
         # Daftar fungsi augmentasi  
        self.augmentation_techniques = [   
            self._rotate_90,  
            self._flip_horizontal,  
            self._flip_vertical,  
            self._brightness_reduce,  
            self._brightness_increase,  
            self._zoom,  
            self._add_noise,  
            self._adjust_exposure,  
            self._boost_saturation 
        ]  
    
    def _load_images(self, directory):
        """Memuat semua gambar dari direktori"""
        images = []
        for root, _, files in os.walk(directory):
            for file in files:
                if os.path.splitext(file)[1].lower() in self.image_extensions:
                    images.append(os.path.join(root, file))
        return images
    
    def _rotate_20(self, image):
        """Rotasi gambar 20 derajat"""
        rows, cols, _ = image.shape
        M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 20, 1)
        return cv2.warpAffine(image, M, (cols, rows))
    
    def _rotate_45(self, image):
        """Rotasi gambar 45 derajat"""
        rows, cols, _ = image.shape
        M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 45, 1)
        return cv2.warpAffine(image, M, (cols, rows))
    
    def _rotate_90(self, image):
        """Rotasi gambar 90 derajat"""
        return np.rot90(image)
    
    def _flip_horizontal(self, image):
        """Flip horizontal"""
        return cv2.flip(image, 1)
    
    def _flip_vertical(self, image):
        """Flip vertikal"""
        return cv2.flip(image, 0)
    
    def _brightness_reduce(self, image):
        """Kurangi kecerahan"""
        return cv2.convertScaleAbs(image, beta=50)
    
    def _brightness_increase(self, image):
        """Tambahkan kecerahan"""
        return cv2.convertScaleAbs(image, beta=-50)
    
    def _zoom(self, image):
        """Perbesar gambar"""
        rows, cols, _ = image.shape
        M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 0, 2)
        return cv2.warpAffine(image, M, (cols, rows))
    
    def _shear(self, image):
        """Shear gambar"""
        rows, cols, _ = image.shape
        M = np.array([[1, 0.3, 0], [0.3, 1, 0]])
        return cv2.warpAffine(image, M, (cols, rows))
    
    def _add_noise(self, image):
        """Tambahkan noise"""
        noise = np.random.normal(0, 20, image.shape).astype(np.uint8)
        return cv2.add(image, noise)
    
    def _adjust_exposure(self, image):
        """Atur exposure"""
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        v = cv2.add(v, 50)
        v = np.clip(v, 0, 255)
        final_hsv = cv2.merge((h, s, v))
        return cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
    
    def _boost_saturation(self, image):
        """Tingkatkan saturasi"""
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        s = cv2.add(s, 50)
        s = np.clip(s, 0, 255)
        final_hsv = cv2.merge((h, s, v))
        return cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
    
    def augment_image(self, image_path):
        """
        Hasilkan augmentasi dari satu gambar
        
        Returns:
            list: Gambar-gambar hasil augmentasi
        """
        image = cv2.imread(image_path)
        
        # Pilih acak teknik augmentasi
        augmented_images = []
        for technique in random.sample(self.augmentation_techniques, 3):
            augmented_images.append(technique(image))
        
        return augmented_images
    
    def process_subdirectory(self, source_subdir, output_subdir):
        """
        Proses augmentasi untuk satu subdirektori
        
        Returns:
            dict: Informasi proses augmentasi
        """
        # Buat direktori output jika belum ada
        os.makedirs(output_subdir, exist_ok=True)
        
        # Muat gambar sumber
        source_images = self._load_images(source_subdir)
        current_count = len(source_images)
        
        # Hitung kebutuhan augmentasi
        augmentation_needed = self.target_count - current_count
        
        # Informasi proses
        process_info = {
            'source_images': current_count,
            'augmentation_needed': augmentation_needed,
            'augmented_images': 0
        }
        
        # Jika tidak perlu augmentasi
        if augmentation_needed <= 0:
            print(f"Direktori {source_subdir}: Sudah memenuhi target")
            return process_info
        
        # Proses augmentasi
        augmented_count = 0
        while augmented_count < augmentation_needed:
            # Pilih gambar sumber secara acak
            source_image_path = random.choice(source_images)
            
            # Hasilkan augmentasi
            augmented_images = self.augment_image(source_image_path)
            
            # Simpan gambar augmentasi
            for aug_image in augmented_images:
                if augmented_count < augmentation_needed:
                    # Nama file unik
                    base_name = os.path.splitext(os.path.basename(source_image_path))[0]
                    output_filename = f"aug_{base_name}_{augmented_count}.jpg"
                    output_path = os.path.join(output_subdir, output_filename)
                    
                    # Simpan gambar
                    cv2.imwrite(output_path, aug_image)
                    augmented_count += 1
                else:
                    break
        
        # Update informasi
        process_info['augmented_images'] = augmented_count
        
        return process_info
    
    def augment_dataset(self):
        """
        Augmentasi seluruh dataset
        """
        print("🚀 Memulai Augmentasi Dataset")
        print(f"Target per Kelas: {self.target_count} gambar")
        
        # Statistik global
        global_stats = {
            'total_subdirectories': 0,
            'processed_subdirectories': 0,
            'total_source_images': 0,
            'total_augmented_images': 0
        }
        
        # Proses setiap subdirektori
        for root, dirs, files in os.walk(self.source_dir):
            # Lewati direktori output
            if root == self.output_dir:
                continue
            
            # Cek apakah direktori mengandung gambar
            images = [f for f in files if os.path.splitext(f)[1].lower() in self.image_extensions]
            
            if images:
                # Path relatif untuk output
                relative_path = os.path.relpath(root, self.source_dir)
                output_subdir = os.path.join(self.output_dir, relative_path)
                
                # Tampilkan informasi
                print(f"\n📁 Memproses: {root}")
                
                # Proses subdirektori
                subdir_stats = self.process_subdirectory(root, output_subdir)
                
                # Update statistik global
                global_stats['total_subdirectories'] += 1
                global_stats['processed_subdirectories'] += 1
                global_stats['total_source_images'] += subdir_stats['source_images']
                global_stats['total_augmented_images'] += subdir_stats['augmented_images']
                
                print(f"Gambar sumber: {subdir_stats['source_images']}")
                print(f"Gambar augmentasi: {subdir_stats['augmented_images']}")
        
        # Tampilkan statistik akhir
        print("\n📊 Statistik Augmentasi Akhir")
        print(f"Total Subdirektori: {global_stats['total_subdirectories']}")
        print(f"Total Gambar Sumber: {global_stats['total_source_images']}")
        print(f"Total Gambar Augmentasi: {global_stats['total_augmented_images']}")
        print(f"Total Gambar Akhir: {global_stats['total_source_images'] + global_stats['total_augmented_images']}")

# Contoh penggunaan
def main():
    source_directory = '/Users/nero555/Documents/Dev/CV/kedelai-DS/dataset/kedelay4/train'
    augmenter = PreciseImageAugmenter(
        source_dir=source_directory, 
        target_count=520  # Tepat 900 gambar per kelas
    )
    augmenter.augment_dataset()

if __name__ == "__main__":
    main()

🚀 Memulai Augmentasi Dataset
Target per Kelas: 520 gambar

📁 Memproses: /Users/nero555/Documents/Dev/CV/kedelai-DS/dataset/kedelay4/train/FrogeyeLeafSpot
Gambar sumber: 83
Gambar augmentasi: 437

📁 Memproses: /Users/nero555/Documents/Dev/CV/kedelai-DS/dataset/kedelay4/train/Healthy
Gambar sumber: 78
Gambar augmentasi: 442

📁 Memproses: /Users/nero555/Documents/Dev/CV/kedelai-DS/dataset/kedelay4/train/Rust
Gambar sumber: 75
Gambar augmentasi: 445

📁 Memproses: /Users/nero555/Documents/Dev/CV/kedelai-DS/dataset/kedelay4/train/SuddenDeathSyndrome
Gambar sumber: 80
Gambar augmentasi: 440

📁 Memproses: /Users/nero555/Documents/Dev/CV/kedelai-DS/dataset/kedelay4/train/BacterialPustule
Gambar sumber: 66
Gambar augmentasi: 454

📁 Memproses: /Users/nero555/Documents/Dev/CV/kedelai-DS/dataset/kedelay4/train/augmented/FrogeyeLeafSpot
Gambar sumber: 437
Gambar augmentasi: 83

📁 Memproses: /Users/nero555/Documents/Dev/CV/kedelai-DS/dataset/kedelay4/train/augmented/Healthy
Gambar sumber: 442
Gambar 