**Data Pre-Processing**

In [2]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from glob import glob
import pandas as pd

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
train_images_path = "/content/drive/MyDrive/SignLanguage/TurkishSignLanguageYoloV8/train/images"
train_labels_path = "/content/drive/MyDrive/SignLanguage/TurkishSignLanguageYoloV8/train/labels"

In [None]:
#normalizing the image dimensions:RESIZE And NORMALIZE

def resize_and_normalize_images(image_folder, target_size=(640, 640)):
    resized_folder = f"{image_folder}_resized"
    os.makedirs(resized_folder, exist_ok=True)

    for img_path in tqdm(glob(os.path.join(image_folder, "*.jpg"))):

        img = cv2.imread(img_path)

        resized_img = cv2.resize(img, target_size)

        normalized_img = resized_img / 255.0


        new_img_path = os.path.join(resized_folder, os.path.basename(img_path))
        cv2.imwrite(new_img_path, (normalized_img * 255).astype(np.uint8))

    return resized_folder

In [None]:
#Data Augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def augment_images(image_folder, save_to_folder, augment_count=3):
    os.makedirs(save_to_folder, exist_ok=True)
    #Data Generator
    datagen = ImageDataGenerator(
        rotation_range=30,  #Rotation Range
        width_shift_range=0.2,  #Width Shift
        height_shift_range=0.2,  #Height Shift
        shear_range=0.2,  #Shear Range
        zoom_range=0.2,  #Zoom Range
        horizontal_flip=True,  #Horizontal
        fill_mode='nearest'  #Fill Mode
    )

    for img_path in tqdm(glob(os.path.join(image_folder, "*.jpg"))):
        img = cv2.imread(img_path)
        # make the appropriate shape
        img = img.reshape((1,) + img.shape)
        i = 0
        # Creating a certain number of augmented versions for each image
        for batch in datagen.flow(img, batch_size=1, save_to_dir=save_to_folder, save_prefix='aug', save_format='jpg'):
            i += 1
            if i >= augment_count:
                break

In [None]:
#Applying Resize and Normalize Function
resized_images_folder = resize_and_normalize_images(train_images_path)

100%|██████████| 20043/20043 [15:12<00:00, 21.97it/s]


In [None]:
#Applying Augmentation Function
augment_save_folder = "/content/drive/MyDrive/SignLanguage/TurkishSignLanguageYoloV8/train/augmented_images"
augment_images(resized_images_folder, augment_save_folder)

100%|██████████| 20043/20043 [1:47:52<00:00,  3.10it/s]


In [5]:
def denoise_images(image_folder):
    denoised_folder = f"{image_folder}_denoised"
    os.makedirs(denoised_folder, exist_ok=True)

    for img_path in tqdm(glob(os.path.join(image_folder, "*.jpg"))):
        img = cv2.imread(img_path)
        denoised_img = cv2.medianBlur(img, 3)  # Median Blur kullanarak gürültü giderme
        new_img_path = os.path.join(denoised_folder, os.path.basename(img_path))
        cv2.imwrite(new_img_path, denoised_img)

    return denoised_folder

In [18]:
# Combined Preprocessing Function (Denoise, Histogram Equalization, )
def combined_preprocessing(image_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for img_path in tqdm(glob(os.path.join(image_folder, "*.jpg"))):
        # Read Image
        img = cv2.imread(img_path)

        # Step 1: Denoise Image
        denoised_img = cv2.medianBlur(img, 3)

        # Step 2: Histogram Equalization (only for grayscale images)
        if len(denoised_img.shape) == 2:  # Grayscale
            equalized_img = cv2.equalizeHist(denoised_img)
        else:  # Color Image
            img_y_cr_cb = cv2.cvtColor(denoised_img, cv2.COLOR_BGR2YCrCb)
            y, cr, cb = cv2.split(img_y_cr_cb)
            y_eq = cv2.equalizeHist(y)
            equalized_img = cv2.merge((y_eq, cr, cb))
            equalized_img = cv2.cvtColor(equalized_img, cv2.COLOR_YCrCb2BGR)

        # Save Preprocessed Image
        new_img_path = os.path.join(output_folder, os.path.basename(img_path))
        cv2.imwrite(new_img_path, equalized_img)


In [6]:
augment_save_folder = "/content/drive/MyDrive/SignLanguage/TurkishSignLanguageYoloV8/train/augmented_images"
final_output_path = "/content/drive/MyDrive/SignLanguage/TurkishSignLanguageYoloV8/train/final_processed_images"

In [26]:
# Apply Combined Preprocessing on Augmented Images
combined_preprocessing(augment_save_folder, final_output_path)

100%|██████████| 9968/9968 [08:58<00:00, 18.52it/s]


In [7]:
processed_labels_path = "/content/drive/MyDrive/SignLanguage/TurkishSignLanguageYoloV8/train/processed_labels"

In [8]:
# Label Preprocessing Function
def preprocess_labels(label_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for label_path in tqdm(glob(os.path.join(label_folder, "*.txt"))):
        with open(label_path, 'r') as file:
            lines = file.readlines()

        processed_lines = []
        for line in lines:
            # Split the bounding box information
            parts = line.strip().split()
            if len(parts) == 9:
                class_id, x_center, y_center, width, height, x_min, y_min, x_max, y_max = parts
                # Convert to float for potential rescaling (if needed)
                x_center = float(x_center)
                y_center = float(y_center)
                width = float(width)
                height = float(height)
                x_min = float(x_min)
                y_min = float(y_min)
                x_max = float(x_max)
                y_max = float(y_max)

                # Here you could add any preprocessing steps for labels, such as scaling or adjustments
                # For now, we will keep them the same

                processed_lines.append(f"{class_id} {x_center} {y_center} {width} {height} {x_min} {y_min} {x_max} {y_max}\n")

        # Save processed label
        new_label_path = os.path.join(output_folder, os.path.basename(label_path))
        with open(new_label_path, 'w') as file:
            file.writelines(processed_lines)

In [9]:
# Apply Label Preprocessing on Original Labels
preprocess_labels(train_labels_path, processed_labels_path)

100%|██████████| 20040/20040 [1:41:06<00:00,  3.30it/s]
