In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import os
from torch.autograd import Variable
from torchvision import datasets, models, transforms
import numpy as np
import cv2
import mediapipe as mp

In [2]:
mp_hands = mp.solutions.hands.Hands(
    max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5
)

In [3]:
def crop_hand_from_image(image, hand_landmarks):
    expansion_factor = 0.2  # You can adjust this value as needed

    min_x = min([landmark.x for landmark in hand_landmarks]) * image.shape[1]
    max_x = max([landmark.x for landmark in hand_landmarks]) * image.shape[1]
    min_y = min([landmark.y for landmark in hand_landmarks]) * image.shape[0]
    max_y = max([landmark.y for landmark in hand_landmarks]) * image.shape[0]

    width = max_x - min_x
    height = max_y - min_y

    min_x = max(0, min_x - expansion_factor * width)
    max_x = min(image.shape[1], max_x + expansion_factor * width)
    min_y = max(0, min_y - expansion_factor * height)
    max_y = min(image.shape[0], max_y + expansion_factor * height)

    cropped_image = image[int(min_y):int(max_y), int(min_x):int(max_x)]
    

    # Data Augmentation
    # 1. Random rotation
    angle = np.random.uniform(-10, 10)  # Random rotation between -10 and 10 degrees
    M = cv2.getRotationMatrix2D((cropped_image.shape[1] / 2, cropped_image.shape[0] / 2), angle, 1)
    rotated_image = cv2.warpAffine(cropped_image, M, (cropped_image.shape[1], cropped_image.shape[0]))
    
    # 2. Random brightness and contrast adjustment
    alpha = 1.0 + np.random.uniform(-0.1, 0.1)  # Random contrast adjustment between 0.9 and 1.1
    beta = np.random.randint(-30, 30)  # Random brightness adjustment between -30 and 30
    augmented_image = cv2.convertScaleAbs(rotated_image, alpha=alpha, beta=beta)
    
    # 3. Random horizontal flip
    if np.random.rand() > 0.5:
        augmented_image = cv2.flip(augmented_image, 1)

    return augmented_image


In [4]:
def process_image_with_mediapipe(image_path):
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = mp_hands.process(image_rgb)
    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0].landmark
        cropped_image = crop_hand_from_image(image, hand_landmarks)


        return cropped_image
    return None

In [5]:
data_dir = 'C:/Sign Language Detection/Module 1 (sign to text)/dataset/'
output_dir = 'C:/Sign Language Detection/Module 1 (sign to text)/resized_dataset/'

os.makedirs(output_dir, exist_ok=True)

In [6]:
for phase in ['Train_Alphabet', 'Test_Alphabet']:
    print(f"Processing {phase}...")
    output_phase_dir = os.path.join(output_dir, phase)
    os.makedirs(output_phase_dir, exist_ok=True)

    for root, _, filenames in os.walk(os.path.join(data_dir, phase)):
        for filename in filenames:
            if filename.endswith(('.jpg', '.jpeg', '.png')):
                image_path = os.path.join(root, filename)
                processed_image = process_image_with_mediapipe(image_path)
                if processed_image is not None:
                    output_path = os.path.join(output_phase_dir, os.path.relpath(image_path, data_dir))
                    os.makedirs(os.path.dirname(output_path), exist_ok=True)
                    cv2.imwrite(output_path, processed_image)

print("Hand images dataset created successfully.")

Processing Train_Alphabet...
Processing Test_Alphabet...
Hand images dataset created successfully.
