In [None]:
import kagglehub
import os
import glob
import pandas as pd

path = kagglehub.dataset_download("jangedoo/utkface-new")
print("Dataset location:", path)

image_paths = glob.glob(os.path.join(path, "**/*.jpg"), recursive=True)

data = []
for p in image_paths:
    filename = os.path.basename(p)
    parts = filename.split('_')
    
    if len(parts) >= 2:
        try:
            age = int(parts[0])
            gender = int(parts[1]) # 0=Male, 1=Female
            data.append({"path": p, "age": age, "gender": gender})
        except (ValueError, IndexError):
            continue 

df = pd.DataFrame(data)

print(f"✅ Success! Total images processed: {len(df)}")
print(df.head())

In [None]:
import numpy as np
import cv2
import random
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

def data_generator(df, batch_size=32, augment=False):
    while True:
        df_shuffled = df.sample(frac=1).reset_index(drop=True)
        for i in range(0, len(df_shuffled), batch_size):
            batch_df = df_shuffled.iloc[i:i+batch_size]
            images, ages, genders = [], [], []
            
            for _, row in batch_df.iterrows():
                img = cv2.imread(row['path'])
                if img is None: continue
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (200, 200))
        
                if augment:
                    if random.random() > 0.5:
                        img = cv2.flip(img, 1)
                    angle = random.uniform(-10, 10)
                    M = cv2.getRotationMatrix2D((100, 100), angle, 1.0)
                    img = cv2.warpAffine(img, M, (200, 200))
                
                images.append(img / 255.0)
                ages.append(row['age'])
                genders.append(row['gender'])
            
            yield np.array(images), {
                "age_output": np.array(ages, dtype='float32'), 
                "gender_output": np.array(genders, dtype='float32')
            }

train_gen = data_generator(train_df, augment=True)
test_gen = data_generator(test_df, augment=False)

print("✅ Augmented Pipeline Ready!")

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, BatchNormalization
from tensorflow.keras.optimizers import Adam 

def build_model():
    inputs = Input(shape=(200, 200, 3))
 
    x = Conv2D(32, (3, 3), activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(128, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Flatten()(x)

    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)

    gender_out = Dense(1, activation='sigmoid', name='gender_output')(x)

    age_out = Dense(1, activation='relu', name='age_output')(x)
    
    model = Model(inputs=inputs, outputs=[age_out, gender_out])
    return model

model = build_model()

custom_optimizer = Adam(learning_rate=0.0001)

model.compile(
    optimizer=custom_optimizer,
    loss={'age_output': 'mse', 'gender_output': 'binary_crossentropy'},
    metrics={'age_output': 'mae', 'gender_output': 'accuracy'}
)

model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint('best_multi_task_model.keras', save_best_only=True)
]

history = model.fit(
    train_gen,
    steps_per_epoch=len(train_df) // 32,
    validation_data=test_gen,
    validation_steps=len(test_df) // 32,
    epochs=20,
    callbacks=callbacks
)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

test_images, test_labels = next(test_gen)

predictions = model.predict(test_images)
pred_ages = predictions[0]
pred_genders = predictions[1]

plt.figure(figsize=(20, 10))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(test_images[i])

    actual_g = "Male" if test_labels['gender_output'][i] == 0 else "Female"
    pred_g = "Male" if pred_genders[i] < 0.5 else "Female"

    actual_a = int(test_labels['age_output'][i])
    pred_a = int(pred_ages[i][0])
    
    plt.title(f"Actual: {actual_a}, {actual_g}\nPred: {pred_a}, {pred_g}")
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
model.save('/kaggle/working/final_augmented_age_gender_model.keras')
print("✅ Model saved in /kaggle/working/")

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image

def predict_my_face(img_path):
    img = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    faces = face_cascade.detectMultiScale(img_rgb, 1.3, 5)
    
    if len(faces) == 0:
        print("❌ No face detected! Try a clearer photo with better lighting.")
        return

    (x, y, w, h) = faces[0]
    face_crop = img_rgb[y:y+h, x:x+w]

    face_resized = cv2.resize(face_crop, (200, 200))
    face_final = face_resized / 255.0
    face_input = np.expand_dims(face_final, axis=0)

    predictions = model.predict(face_input)
    
    pred_age = int(predictions[0][0][0])
    pred_gender = "Male" if predictions[1][0][0] < 0.5 else "Female"
    confidence = (1 - predictions[1][0][0]) if pred_gender == "Male" else predictions[1][0][0]

    plt.imshow(face_resized)
    plt.title(f"Prediction: {pred_age} years old, {pred_gender}\nConfidence: {confidence*100:.2f}%")
    plt.axis('off')
    plt.show()
