In [1]:
# import os
# import pandas as pd

# def create_labels_csv(dataset_path, output_csv):
#     # List to store image file paths and their corresponding labels
#     data = []

#     # Traverse the dataset directory
#     for root, dirs, files in os.walk(dataset_path):
#         for file in files:
#             if file.endswith(('.jpg', '.jpeg', '.png')):
#                 # Get the class label from the subdirectory name
#                 label = os.path.basename(root)
#                 # Get the full file path
#                 file_path = os.path.join(root, file)
#                 # Append to the data list
#                 data.append([file_path, label])
    
#     # Create a DataFrame from the data list
#     df = pd.DataFrame(data, columns=['file_path', 'label'])
    
#     # Save the DataFrame to a CSV file
#     df.to_csv(output_csv, index=False)
#     print(f'Labels CSV file created at: {output_csv}')

# # Usage
# dataset_path = "../data/data_original/javaneseScript_byPhiard/train"
# output_csv = "../data/data_original/javaneseScript_byPhiard/train/labels.csv"
# create_labels_csv(dataset_path, output_csv)

Labels CSV file created at: ../data/data_original/javaneseScript_byPhiard/train/labels.csv


In [12]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img, array_to_img, save_img

def augment_images(dataset_path, output_path, output_csv, augment_count=550):
    # Create an ImageDataGenerator object with augmentation parameters
    datagen = ImageDataGenerator(
        rotation_range=15,
        # width_shift_range=0.2,
        # height_shift_range=0.2,
        shear_range=0.1,
        zoom_range=0.1,
        # horizontal_flip=True,
        fill_mode='nearest'
    )
    
    labels = []

    for class_dir in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_dir)
        if not os.path.isdir(class_path):
            continue
        
        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)
            if not img_file.endswith(('.jpg', '.jpeg', '.png')):
                continue
            
            img = load_img(img_path)
            x = img_to_array(img)
            x = x.reshape((1,) + x.shape)
            
            i = 0
            for batch in datagen.flow(x, batch_size=1):
                augmented_img = array_to_img(batch[0])
                augmented_img_name = f'{os.path.splitext(img_file)[0]}_aug_{i}.png'
                augmented_img_path = os.path.join(output_path, class_dir)
                if not os.path.exists(augmented_img_path):
                    os.makedirs(augmented_img_path)
                
                save_img(os.path.join(augmented_img_path, augmented_img_name), augmented_img)
                labels.append([os.path.join(augmented_img_path, augmented_img_name), class_dir])
                i += 1
                if i >= augment_count:
                    break
            break
    
    # Save labels to CSV
    df = pd.DataFrame(labels, columns=['file_path', 'label'])
    df.to_csv(output_csv, index=False)
    print(f'Augmented images and labels CSV file created at: {output_csv}')

# Usage
dataset_path = '../data/data_original/javaneseScript_byPhiard/train'
output_path = '../data/data_preprocessing/v0.9/'
output_csv = '../data/data_preprocessing/v0.9/augmented_labels.csv'
augment_images(dataset_path, output_path, output_csv)

Augmented images and labels CSV file created at: ../data/data_preprocessing/v0.9/augmented_labels.csv


In [1]:
# import os
# import pandas as pd
# import numpy as np
# import cv2
# from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, save_img

# def augment_images(dataset_path, output_path, output_csv, augment_count=5):
#     # Create an ImageDataGenerator object with augmentation parameters
#     datagen = ImageDataGenerator(
#         rotation_range=15,
#         shear_range=0.1,
#         zoom_range=0.1,
#         fill_mode='nearest'
#     )
    
#     labels = []

#     for class_dir in os.listdir(dataset_path):
#         class_path = os.path.join(dataset_path, class_dir)
#         if not os.path.isdir(class_path):
#             continue
        
#         for img_file in os.listdir(class_path):
#             img_path = os.path.join(class_path, img_file)
#             if not img_file.endswith(('.jpg', '.jpeg', '.png')):
#                 continue
            
#             # Load the image
#             img = cv2.imread(img_path)
            
#             # Convert to grayscale
#             gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
#             # Apply Laplacian of Gaussian (LoG)
#             log_img = cv2.GaussianBlur(gray_img, (3, 3), 0)
#             log_img = cv2.Laplacian(log_img, cv2.CV_64F)
#             log_img = np.uint8(np.absolute(log_img))
            
#             # Add a dummy channel dimension to make it rank 3
#             log_img = np.expand_dims(log_img, axis=-1)
            
#             # Reshape to add batch dimension
#             x = np.expand_dims(log_img, axis=0)
            
#             i = 0
#             for batch in datagen.flow(x, batch_size=1):
#                 augmented_img = array_to_img(batch[0])
#                 augmented_img_name = f'{os.path.splitext(img_file)[0]}_aug_{i}.png'
#                 augmented_img_path = os.path.join(output_path, class_dir)
#                 if not os.path.exists(augmented_img_path):
#                     os.makedirs(augmented_img_path)
                
#                 save_img(os.path.join(augmented_img_path, augmented_img_name), augmented_img)
#                 labels.append([os.path.join(augmented_img_path, augmented_img_name), class_dir])
#                 i += 1
#                 if i >= augment_count:
#                     break
    
#     # Save labels to CSV
#     df = pd.DataFrame(labels, columns=['file_path', 'label'])
#     df.to_csv(output_csv, index=False)
#     print(f'Augmented images and labels CSV file created at: {output_csv}')

# # Usage
# dataset_path = '../data/data_original/javaneseScript_byPhiard/train'
# output_path = '../data/data_preprocessing/v0.9/'
# output_csv = '../data/data_preprocessing/v0.9/augmented_labels.csv'
# augment_images(dataset_path, output_path, output_csv)
