# Ethiopian Sign Language - TWO HAND Landmark Model V2
## Improved version with better accuracy!

**Improvements over V1:**
- Uses BOTH hands (126 features = 2 hands x 21 landmarks x 3 coords)
- Better data augmentation (rotation, scaling, noise)
- Deeper neural network with residual connections
- Class weighting for imbalanced data
- More robust to lighting and position variations

In [None]:
# ============ STEP 1: INSTALL & SETUP ============
!pip uninstall numpy -y
!pip install numpy==1.26.4 -q
!pip install mediapipe -q

print('Installation complete!')
print('>>> RESTART RUNTIME NOW: Runtime > Restart session')
print('>>> Then run from STEP 2')

In [None]:
# ============ STEP 2: MOUNT DRIVE & SET PATHS ============
from google.colab import drive
drive.mount('/content/drive')

# ===== CHANGE THESE PATHS TO YOUR FOLDER! =====
DATA_DIR = '/content/drive/MyDrive/eth_frames/train'
MODEL_SAVE_PATH = '/content/drive/MyDrive/eth_landmark_v2.keras'
LABELS_SAVE_PATH = '/content/drive/MyDrive/eth_landmark_labels_v2.json'

print('Paths configured!')

In [None]:
# ============ STEP 3: IMPORTS ============
import os
import json
import numpy as np
import cv2
import mediapipe as mp
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input, Add, Activation
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

print(f'TensorFlow: {tf.__version__}')
print(f'GPU: {tf.config.list_physical_devices("GPU")}')

In [None]:
# ============ STEP 4: CHECK DATASET ============
print('Checking dataset...')

if os.path.exists(DATA_DIR):
    classes = sorted([d for d in os.listdir(DATA_DIR) if os.path.isdir(os.path.join(DATA_DIR, d))])
    print(f'Found {len(classes)} classes:')
    total_images = 0
    class_counts = {}
    for i, cls in enumerate(classes):
        cls_path = os.path.join(DATA_DIR, cls)
        images = [f for f in os.listdir(cls_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        total_images += len(images)
        class_counts[cls] = len(images)
        print(f'  {i}: {cls} - {len(images)} images')
    print(f'Total images: {total_images}')
else:
    print(f'ERROR: Data folder not found: {DATA_DIR}')

In [None]:
# ============ STEP 5: EXTRACT TWO-HAND LANDMARKS ============
def extract_two_hand_landmarks(image_path, hands):
    """
    Extract landmarks from BOTH hands.
    Returns 126 features (2 hands x 21 landmarks x 3 coords)
    If only one hand detected, the other hand is filled with zeros.
    """
    image = cv2.imread(image_path)
    if image is None:
        return None
    
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    
    # Initialize both hands with zeros
    left_hand = np.zeros(63)  # 21 landmarks x 3 coords
    right_hand = np.zeros(63)
    
    if results.multi_hand_landmarks and results.multi_handedness:
        for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
            # Get hand type (Left or Right)
            hand_type = handedness.classification[0].label
            
            # Extract landmarks
            landmarks = []
            for lm in hand_landmarks.landmark:
                landmarks.extend([lm.x, lm.y, lm.z])
            
            if hand_type == 'Left':
                left_hand = np.array(landmarks)
            else:
                right_hand = np.array(landmarks)
    
    # Check if at least one hand was detected
    if np.sum(np.abs(left_hand)) == 0 and np.sum(np.abs(right_hand)) == 0:
        return None
    
    # Concatenate both hands: [left_hand(63) + right_hand(63)] = 126 features
    return np.concatenate([left_hand, right_hand])

# Extract landmarks from all images
print('Extracting TWO-HAND landmarks from images...')
print('This may take several minutes...\n')

X_data = []
y_data = []
failed_images = []

with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=2,  # IMPORTANT: Detect up to 2 hands!
    min_detection_confidence=0.5
) as hands:
    
    for class_idx, class_name in enumerate(tqdm(classes, desc='Processing classes')):
        class_path = os.path.join(DATA_DIR, class_name)
        images = [f for f in os.listdir(class_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        
        for img_name in images:
            img_path = os.path.join(class_path, img_name)
            landmarks = extract_two_hand_landmarks(img_path, hands)
            
            if landmarks is not None:
                X_data.append(landmarks)
                y_data.append(class_idx)
            else:
                failed_images.append(f'{class_name}/{img_name}')

X_data = np.array(X_data)
y_data = np.array(y_data)

print(f'\nExtraction complete!')
print(f'Total samples: {len(X_data)}')
print(f'Failed (no hands): {len(failed_images)}')
print(f'Feature shape: {X_data.shape} (should be [N, 126])')

In [None]:
# ============ STEP 6: VISUALIZE SAMPLE ============
print('Sample images with detected landmarks:')
fig, axes = plt.subplots(2, 4, figsize=(16, 8))

with mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5) as hands:
    sample_idx = 0
    for i, class_name in enumerate(classes[:8]):
        class_path = os.path.join(DATA_DIR, class_name)
        images = [f for f in os.listdir(class_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        if images:
            img_path = os.path.join(class_path, images[0])
            image = cv2.imread(img_path)
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = hands.process(image_rgb)
            
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(image_rgb, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            
            ax = axes[sample_idx // 4, sample_idx % 4]
            ax.imshow(image_rgb)
            num_hands = len(results.multi_hand_landmarks) if results.multi_hand_landmarks else 0
            ax.set_title(f'{class_name} ({num_hands} hands)')
            ax.axis('off')
            sample_idx += 1

plt.tight_layout()
plt.show()

In [None]:
# ============ STEP 7: PREPARE DATA ============
X_train, X_test, y_train, y_test = train_test_split(
    X_data, y_data, test_size=0.2, random_state=42, stratify=y_data
)

num_classes = len(classes)
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

# Compute class weights for imbalanced data
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))

print(f'Training samples: {len(X_train)}')
print(f'Test samples: {len(X_test)}')
print(f'Number of classes: {num_classes}')
print(f'Input shape: 126 features (2 hands x 21 landmarks x 3 coords)')

In [None]:
# ============ STEP 8: ADVANCED DATA AUGMENTATION ============
def augment_landmarks_advanced(landmarks, num_augmented=8):
    """
    Advanced augmentation for two-hand landmarks.
    """
    augmented = [landmarks]
    
    for _ in range(num_augmented):
        aug = landmarks.copy()
        
        # 1. Add random noise
        noise = np.random.normal(0, 0.015, aug.shape)
        aug_noise = aug + noise
        augmented.append(aug_noise)
        
        # 2. Scale (simulate distance)
        scale = np.random.uniform(0.9, 1.1)
        aug_scale = aug * scale
        augmented.append(aug_scale)
        
        # 3. Shift position
        shift_x = np.random.uniform(-0.1, 0.1)
        shift_y = np.random.uniform(-0.1, 0.1)
        aug_shift = aug.copy()
        # Shift x coordinates (every 3rd starting from 0)
        aug_shift[0::3] += shift_x
        # Shift y coordinates (every 3rd starting from 1)
        aug_shift[1::3] += shift_y
        augmented.append(aug_shift)
    
    return augmented

print('Augmenting training data...')
X_train_aug = []
y_train_aug = []

for x, y in tqdm(zip(X_train, y_train), total=len(X_train), desc='Augmenting'):
    augmented = augment_landmarks_advanced(x, num_augmented=5)
    X_train_aug.extend(augmented)
    y_train_aug.extend([y] * len(augmented))

X_train_aug = np.array(X_train_aug)
y_train_aug = to_categorical(np.array(y_train_aug), num_classes)

print(f'Original training samples: {len(X_train)}')
print(f'Augmented training samples: {len(X_train_aug)}')

In [None]:
# ============ STEP 9: BUILD IMPROVED MODEL ============
def build_landmark_model(input_shape=126, num_classes=20):
    """
    Deeper model with residual connections for better accuracy.
    """
    inputs = Input(shape=(input_shape,))
    
    # First block
    x = Dense(512, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    
    # Second block with residual
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    
    # Third block
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    
    # Fourth block
    x = Dense(64, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    
    # Output
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs, outputs)
    return model

model = build_landmark_model(input_shape=126, num_classes=num_classes)

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()
print(f'\nTotal parameters: {model.count_params():,}')

In [None]:
# ============ STEP 10: TRAIN MODEL ============
callbacks = [
    ModelCheckpoint(MODEL_SAVE_PATH, monitor='val_accuracy', save_best_only=True, verbose=1),
    EarlyStopping(monitor='val_accuracy', patience=25, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6, verbose=1)
]

print('Training model with class weights...')
history = model.fit(
    X_train_aug, y_train_aug,
    validation_data=(X_test, y_test_cat),
    epochs=150,
    batch_size=32,
    callbacks=callbacks,
    class_weight=class_weight_dict,
    verbose=1
)

In [None]:
# ============ STEP 11: PLOT TRAINING HISTORY ============
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

ax1.plot(history.history['accuracy'], label='Train', linewidth=2)
ax1.plot(history.history['val_accuracy'], label='Validation', linewidth=2)
ax1.set_title('Model Accuracy', fontsize=14)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.plot(history.history['loss'], label='Train', linewidth=2)
ax2.plot(history.history['val_loss'], label='Validation', linewidth=2)
ax2.set_title('Model Loss', fontsize=14)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# ============ STEP 12: EVALUATE MODEL ============
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

best_model = keras.models.load_model(MODEL_SAVE_PATH)
test_loss, test_acc = best_model.evaluate(X_test, y_test_cat, verbose=0)

print(f'\nTest Accuracy: {test_acc*100:.2f}%')
print(f'Test Loss: {test_loss:.4f}')

y_pred = best_model.predict(X_test, verbose=0)
y_pred_classes = np.argmax(y_pred, axis=1)

print('\nClassification Report:')
print(classification_report(y_test, y_pred_classes, target_names=classes))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred_classes)
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.tight_layout()
plt.show()

In [None]:
# ============ STEP 13: SAVE LABELS ============
labels_dict = {str(i): classes[i] for i in range(len(classes))}

with open(LABELS_SAVE_PATH, 'w', encoding='utf-8') as f:
    json.dump(labels_dict, f, ensure_ascii=False, indent=2)

print(f'Labels saved to: {LABELS_SAVE_PATH}')
print(f'\nLabel mapping:')
for idx, label in labels_dict.items():
    print(f'  {idx}: {label}')

In [None]:
# ============ STEP 14: DOWNLOAD FILES ============
print('='*60)
print('TRAINING COMPLETE!')
print('='*60)
print(f'\nModel: {MODEL_SAVE_PATH}')
print(f'Labels: {LABELS_SAVE_PATH}')
print(f'\nTest Accuracy: {test_acc*100:.2f}%')
print(f'Features: 126 (2 hands x 21 landmarks x 3 coords)')

from google.colab import files
files.download(MODEL_SAVE_PATH)
files.download(LABELS_SAVE_PATH)