In [6]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

image_dir = "UTKFace/UTKFace"  # This is the correct path

data = []
age_labels = []
gender_labels = []

for filename in os.listdir(image_dir):
    try:
        if not filename.endswith(".jpg"):
            continue

        age, gender, *_ = filename.split("_")
        img_path = os.path.join(image_dir, filename)
        img = cv2.imread(img_path)
        if img is None:
            continue
        img = cv2.resize(img, (64, 64))

        data.append(img)
        age_labels.append(int(age))
        gender_labels.append(int(gender))
    except Exception as e:
        print(f"Skipping {filename}: {e}")
        continue

print(f"✅ Total valid images loaded: {len(data)}")

X = np.array(data) / 255.0
y_age = np.array(age_labels)
y_gender = np.array(gender_labels)

# Split into train/test
X_train, X_test, y_age_train, y_age_test, y_gender_train, y_gender_test = train_test_split(
    X, y_age, y_gender, test_size=0.2, random_state=42
)

print("✅ Data preprocessed and ready for model training.")


✅ Total valid images loaded: 23708
✅ Data preprocessed and ready for model training.


In [8]:
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

# Input layer
inputs = layers.Input(shape=(64, 64, 3))

# Shared CNN Base
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
x = layers.MaxPooling2D(2)(x)
x = layers.BatchNormalization()(x)

x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.BatchNormalization()(x)

x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.BatchNormalization()(x)

x = layers.Flatten()(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.4)(x)

# Output heads
gender_output = layers.Dense(1, activation='sigmoid', name='gender_output')(x)
age_output = layers.Dense(1, activation='linear', name='age_output')(x)

# Define model
model = models.Model(inputs=inputs, outputs=[gender_output, age_output])

# Compile with multi-task loss
model.compile(
    optimizer='adam',
    loss={
        'gender_output': 'binary_crossentropy',
        'age_output': 'mae'
    },
    metrics={
        'gender_output': 'accuracy',
        'age_output': 'mae'
    }
)

model.summary()

In [9]:
early_stop = callbacks.EarlyStopping(patience=5, restore_best_weights=True)
lr_reduce = callbacks.ReduceLROnPlateau(factor=0.5, patience=3, verbose=1)

history = model.fit(
    X_train,
    {'gender_output': y_gender_train, 'age_output': y_age_train},
    validation_split=0.1,
    epochs=30,
    batch_size=64,
    callbacks=[early_stop, lr_reduce]
)

Epoch 1/30
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 182ms/step - age_output_loss: 13.0346 - age_output_mae: 13.0347 - gender_output_accuracy: 0.7316 - gender_output_loss: 0.8089 - loss: 13.8435 - val_age_output_loss: 12.3720 - val_age_output_mae: 12.3636 - val_gender_output_accuracy: 0.6431 - val_gender_output_loss: 0.6388 - val_loss: 13.0031 - learning_rate: 0.0010
Epoch 2/30
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 159ms/step - age_output_loss: 8.4426 - age_output_mae: 8.4426 - gender_output_accuracy: 0.8342 - gender_output_loss: 0.3810 - loss: 8.8236 - val_age_output_loss: 9.5210 - val_age_output_mae: 9.5060 - val_gender_output_accuracy: 0.8556 - val_gender_output_loss: 0.3208 - val_loss: 9.8273 - learning_rate: 0.0010
Epoch 3/30
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 158ms/step - age_output_loss: 7.7939 - age_output_mae: 7.7940 - gender_output_accuracy: 0.8560 - gender_output_loss: 0.3248 - loss: 8.1187

In [10]:
results = model.evaluate(
    X_test,
    {'gender_output': y_gender_test, 'age_output': y_age_test}
)

print("\nTest Results:")
for name, value in zip(model.metrics_names, results):
    print(f"{name}: {value:.4f}")

[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - age_output_loss: 6.1011 - age_output_mae: 6.1012 - gender_output_accuracy: 0.9009 - gender_output_loss: 0.3394 - loss: 6.4406

Test Results:
loss: 6.4900
compile_metrics: 0.3182
gender_output_loss: 6.1637
age_output_loss: 6.1702


In [11]:
import numpy as np

# Predict gender and age on test data
gender_pred_prob, age_pred = model.predict(X_test)

# Since gender is sigmoid output, convert to binary labels
gender_pred = (gender_pred_prob > 0.5).astype(int).reshape(-1)


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step


In [12]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

print("Gender Classification Metrics:")

cm = confusion_matrix(y_gender_test, gender_pred)
print("Confusion Matrix:\n", cm)

acc = accuracy_score(y_gender_test, gender_pred)
prec = precision_score(y_gender_test, gender_pred)
rec = recall_score(y_gender_test, gender_pred)
f1 = f1_score(y_gender_test, gender_pred)

print(f"Accuracy: {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall: {rec:.4f}")
print(f"F1 Score: {f1:.4f}")


Gender Classification Metrics:
Confusion Matrix:
 [[2252  233]
 [ 216 2041]]
Accuracy: 0.9053
Precision: 0.8975
Recall: 0.9043
F1 Score: 0.9009


In [13]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

print("\nAge Regression Metrics:")

r2 = r2_score(y_age_test, age_pred)
mae = mean_absolute_error(y_age_test, age_pred)
mse = mean_squared_error(y_age_test, age_pred)

print(f"R² Score: {r2:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")



Age Regression Metrics:
R² Score: 0.8115
MAE: 6.1702
MSE: 74.8102


In [14]:
model.save('age_gender_model2.keras', include_optimizer=False)


In [15]:
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import MeanAbsoluteError

model = load_model('age_gender_model1.keras', compile=False)

model.compile(
    optimizer='adam',
    loss={
        'gender_output': 'binary_crossentropy',
        'age_output': MeanAbsoluteError()
    },
    metrics={
        'gender_output': 'accuracy',
        'age_output': MeanAbsoluteError()
    }
)


In [16]:
# import cv2
# import numpy as np
# from tensorflow.keras.models import load_model
# from tensorflow.keras.losses import MeanAbsoluteError

# # Load the trained model and compile explicitly
# model = load_model('age_gender_model2.keras', compile=False)
# model.compile(
#     optimizer='adam',
#     loss={
#         'gender_output': 'binary_crossentropy',
#         'age_output': MeanAbsoluteError()
#     },
#     metrics={
#         'gender_output': 'accuracy',
#         'age_output': MeanAbsoluteError()
#     }
# )

# # Load Haar Cascade for face detection
# face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# def preprocess_face(face_img):
#     """Resize, normalize, and reshape face image for prediction."""
#     face_img = cv2.resize(face_img, (64, 64))
#     face_img = face_img.astype('float32') / 255.0
#     face_img = np.expand_dims(face_img, axis=0)  # Shape: (1, 64, 64, 3)
#     return face_img

# # Start webcam capture
# cap = cv2.VideoCapture(0)

# while True:
#     ret, frame = cap.read()
#     if not ret:
#         break

#     # Convert to grayscale for face detection
#     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

#     # Detect faces
#     faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)

#     for (x, y, w, h) in faces:
#         # Extract the face ROI from original frame
#         face_img = frame[y:y+h, x:x+w]

#         # Preprocess for model
#         input_img = preprocess_face(face_img)

#         # Predict gender and age
#         gender_pred_prob, age_pred = model.predict(input_img)
#         gender_label = "Female" if gender_pred_prob[0][0] > 0.5 else "Male"
#         age_label = int(age_pred[0][0])

#         # Draw bounding box on the face
#         cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)

#         # Text position above the box
#         text = f"{gender_label}, Age: {age_label}"
#         (text_width, text_height), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
#         cv2.rectangle(frame, (x, y - text_height - 10), (x + text_width, y), (0, 255, 0), -1)  # Filled box for text
#         cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)

#     # Display the frame
#     cv2.imshow('Age & Gender Prediction', frame)

#     # Press 'q' to quit
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# # Cleanup
# cap.release()
# cv2.destroyAllWindows()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

In [17]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import MeanAbsoluteError

# Load your trained multi-output model (.keras format)
model = load_model('age_gender_model2.keras', compile=False)
model.compile(
    optimizer='adam',
    loss={
        'gender_output': 'binary_crossentropy',
        'age_output': MeanAbsoluteError()
    },
    metrics={
        'gender_output': 'accuracy',
        'age_output': MeanAbsoluteError()
    }
)

# Haar cascade face detector
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def preprocess_face(face_img):
    """Resize and normalize face image for prediction."""
    face_img = cv2.resize(face_img, (64, 64))
    face_img = face_img.astype('float32') / 255.0
    face_img = np.expand_dims(face_img, axis=0)  # (1, 64, 64, 3)
    return face_img

def age_group(age):
    """Convert numeric age to categorical age group."""
    if age < 12:
        return "Kid"
    elif age < 36:
        return "Young Adult"
    elif age < 60:
        return "Middle-Aged"
    else:
        return "Senior Citizen"

# Start webcam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)

    for (x, y, w, h) in faces:
        face_img = frame[y:y+h, x:x+w]

        input_img = preprocess_face(face_img)

        # Predict gender and age
        gender_pred_prob, age_pred = model.predict(input_img)
        gender_label = "Female" if gender_pred_prob[0][0] > 0.5 else "Male"
        age_label = age_group(int(age_pred[0][0]))

        # Draw rectangle around face
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)

        # Prepare text label
        text = f"{gender_label}, {age_label}"

        # Calculate text size and draw filled rectangle for better readability
        (text_width, text_height), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
        cv2.rectangle(frame, (x, y - text_height - 10), (x + text_width, y), (0, 255, 0), -1)

        # Put text above face box
        cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)

    # Show frame
    cv2.imshow('Age Group & Gender Prediction', frame)

    # Press 'q' to quit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3