# Age and Gender Detection

## 1. Dataset Loading

### 1.1 Load Dataset & Inspect CSV

In [1]:
import pandas as pd

CSV_PATH = "/kaggle/input/ageutk/ageutk_full.csv"
IMG_DIR = "/kaggle/input/ageutk/UTKFace/UTKFace/UTKFace"

df = pd.read_csv(CSV_PATH)
df.head()

Unnamed: 0,files,age,gender
0,26_0_2_20170104023102422.jpg.chip.jpg,26,0
1,22_1_1_20170112233644761.jpg.chip.jpg,22,1
2,21_1_3_20170105003215901.jpg.chip.jpg,21,1
3,28_0_0_20170117180555824.jpg.chip.jpg,28,0
4,17_1_4_20170103222931966.jpg.chip.jpg,17,1


## 2. Basic Dataset Validation

### 2.1 Check size, null values, and gender distribution

In [4]:
print("Total samples:", len(df))
print(df.isnull().sum())
print(df['gender'].value_counts())

Total samples: 23168
files     0
age       0
gender    0
dtype: int64
gender
0    12208
1    10960
Name: count, dtype: int64


### 2.2 Age Sanity Checks


In [7]:
print("Min age:", df['age'].min())
print("Max age:", df['age'].max())

Min age: 1
Max age: 80


### 3.Image Loading & Preprocessing

### 3.1 Image Loader Function

In [8]:
IMG_SIZE = 128

def load_and_preprocess(img_name):
    img_path = os.path.join(IMG_DIR, img_name)
    img = cv2.imread(img_path)
    
    if img is None:
        return None
    
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img / 255.0
    return img

### 3.2 Build Clean Dataset

In [12]:
from tqdm import tqdm
import os
import cv2 

images = []
ages = []
genders = []

for _, row in tqdm(df.iterrows(), total=len(df)):
    img = load_and_preprocess(row['files'])
    
    if img is not None:
        images.append(img)
        ages.append(row['age'])
        genders.append(row['gender'])


100%|██████████| 23168/23168 [02:41<00:00, 143.15it/s]


### 3.3 Convert to Numpy

In [13]:
import numpy as np

X = np.array(images, dtype=np.float32)
y_age = np.array(ages, dtype=np.float32)
y_gender = np.array(genders, dtype=np.float32)


### 3.4 Final Sanity Check

In [14]:
print("Images shape:", X.shape)
print("Age labels shape:", y_age.shape)
print("Gender labels shape:", y_gender.shape)

Images shape: (23168, 128, 128, 3)
Age labels shape: (23168,)
Gender labels shape: (23168,)


## 4. Train / Validation Split

In [16]:
from sklearn.model_selection import train_test_split

X_train, X_val, age_train, age_val, gender_train, gender_val = train_test_split(
    X, y_age, y_gender,
    test_size=0.2,
    random_state=42
)

print("Train:", X_train.shape)
print("Validation:", X_val.shape)

Train: (18534, 128, 128, 3)
Validation: (4634, 128, 128, 3)


## 5. Model Design

### 5.1 Import Deep Learning Libraries

In [21]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Input, GlobalAveragePooling2D,
    Dense, Dropout
)
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2

### 5.2 Load Pretrained Backbone (MobileNetV2)

In [22]:
IMG_SIZE = 128

base_model = MobileNetV2(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights="imagenet"
)

base_model.trainable = False

2025-12-26 06:54:00.275686: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


### 5.3 Build the Dual-Head Architecture

In [23]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation="relu")(x)
x = Dropout(0.3)(x)

age_output = Dense(1, name="age")(x)
gender_output = Dense(1, activation="sigmoid", name="gender")(x)

### 5.4 Create and Compile the Model

In [25]:
model = Model(
    inputs=base_model.input,
    outputs=[age_output, gender_output]
)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss={
        "age": "mse",
        "gender": "binary_crossentropy"
    },
    metrics={
        "age": "mae",
        "gender": "accuracy"
    }
)
model.summary()