In [None]:
import pandas as pd
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from tqdm import tqdm
warnings.filterwarnings('ignore')
import tensorflow as tf
from keras.preprocessing.image import load_img
from keras.models import Sequential, Model
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, Input
from PIL import Image
from tensorflow.keras.utils import plot_model # type: ignore
from pathlib import Path
from tensorflow.keras.preprocessing.image import load_img
from sklearn.model_selection import train_test_split
from tensorflow.keras.initializers import random_uniform, glorot_uniform, constant, identity
from tensorflow.keras.layers import Dropout, Input, Add, Dense, Activation, BatchNormalization, Flatten, Conv2D, MaxPooling2D, GlobalMaxPooling2D
from tensorflow.keras.models import Model, load_model
import warnings
warnings.filterwarnings("ignore", message=".*CuDNN.*")
warnings.filterwarnings("ignore", message=".*cuFFT.*")
warnings.filterwarnings("ignore", message=".*cuBLAS.*")
import os
# from tensorflow.keras.utils import np_utils
from tensorflow.keras import utils
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras import datasets, layers, models
from PIL import Image

In [None]:
BASE_DIRS = [r'../data/UTKFace/UTKFace']

In [None]:
image_paths = []
age_labels = []
gender_paths = []

for BASE_DIR in BASE_DIRS:
    for filename in tqdm(os.listdir(BASE_DIR)):
        temp = filename.split('_')
        if temp[0].isdigit():  # Check if the first part of the filename is a number
            age = int(temp[0])
            gender = int(temp[1])
            image_path = os.path.join(BASE_DIR, filename)
            image_paths.append(image_path)
            age_labels.append(age)
            gender_paths.append(gender)

In [None]:
df =pd.DataFrame()
df['image'], df['age'], df['gender'] = image_paths,age_labels,gender_paths
df.head()
# print(df.head())
print(f"Dataframe: {df}")
gender_dict = {0:'Male', 1:'Female'}



In [None]:
df = pd.DataFrame()
df['image'], df['age'], df['gender'] = image_paths, age_labels, gender_paths

# Debug print to check if the dataframe is populated correctly
print(f"Dataframe shape: {df.shape}")
print(f"First few rows of the dataframe: {df.head()}")

if not df.empty:
    gender_dict = {0: 'Male', 1: 'Female'}

    try:
        img = Image.open(df['image'][0])
        plt.axis('off')
        plt.imshow(img)
        plt.show()
    except Exception as e:
        print(f"Error opening image: {e}")
else:
    print("Dataframe is empty. No images found.")

In [None]:
sns.displot(df['age'],kde=True, bins=30)
plt.title('Age Distribution')
plt.legend()
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.show()

In [None]:
plt.figure(figsize=(25,25))
files = df.iloc[0:25]

In [None]:
def extract_feature(images):
    features = []
    for image in tqdm(images):
        # Load image and convert to grayscale
        img = Image.open(image).convert('L')
        # Resize image
        img = img.resize((128, 128), Image.BILINEAR)  # Use Image.ANTIALIAS

        img = np.array(img)
        features.append(img)
    
    features = np.array(features)

    features = features.reshape(len(features), 128, 128, 1)
    return features

In [None]:
X = extract_feature(df['image'])

print(f"X_shape: \t{X.shape}")
X = X / 255.0 #normalize the  image

y_gender = np.array(df['gender'])
y_age = np.array(df['age'])
print("Shape of y_gender:", y_gender.shape)
print("Shape of y_age:", y_age.shape)

In [None]:
input_shape = (128,128,1)

inputs = Input(input_shape) 

#convolution layers
conv1 = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
maxp1 = MaxPooling2D(pool_size=(2, 2))(conv1)

conv2 = Conv2D(64, kernel_size=(3, 3), activation='relu')(maxp1)
maxp2 = MaxPooling2D(pool_size=(2, 2))(conv2)

conv3 = Conv2D(128, kernel_size=(3, 3), activation='relu')(maxp2)
maxp3 = MaxPooling2D(pool_size=(2, 2))(conv3)

conv4 = Conv2D(256, kernel_size=(3, 3), activation='relu')(maxp3)
maxp4 = MaxPooling2D(pool_size=(2, 2))(conv4)

flatten = Flatten()(maxp4)

# Fully connected layers
dense1 = Dense(256, activation='relu')(flatten)
dense2 = Dense(256, activation='relu')(flatten)

dropout1 = Dropout(0.3)(dense1)
dropout2 = Dropout(0.3)(dense2)

# Output layers
output1 = Dense(1, activation='sigmoid', name='gender_out')(dropout1)
output2 = Dense(1, activation='relu', name='age_out')(dropout2)

model = Model(inputs=inputs, outputs=[output1, output2])


model.summary()


In [None]:
# Concatenate y_gender and y_age into a single array
y_combined = np.column_stack((y_gender, y_age))

# Split the dataset into training and temporary sets
X_train, X_temp, y_combined_train, y_combined_temp = train_test_split(X, y_combined, test_size=0.2, random_state=42)

# Từ phần còn lại, chia thành 50% testing và 50% validation
X_test, X_val, y_combined_test, y_combined_val = train_test_split(X_temp, y_combined_temp, test_size=0.5, random_state=42)

# Split y_combined back into y_gender and y_age
y_train_gender, y_train_age = y_combined_train[:, 0], y_combined_train[:, 1]
y_test_gender, y_test_age = y_combined_test[:, 0], y_combined_test[:, 1]
y_val_gender, y_val_age = y_combined_val[:, 0], y_combined_val[:, 1]

In [None]:
from keras.callbacks import LearningRateScheduler

model_path = './best_model.keras'

checkpointer = ModelCheckpoint(
    filepath=model_path,
    monitor='val_gender_out_accuracy',
    verbose=1,
    mode='max',
    save_best_only=True
)

model.compile(loss=['binary_crossentropy', 'mae'], 
              optimizer='adam', 
              metrics=['accuracy', 'mae'])

annealer = LearningRateScheduler(lambda x: 1e-3 * 0.9 ** x)

In [None]:
total_images = len(image_paths)
print(f"Total number of images: {total_images}")

# Compute number of images in each set
train_images = len(X_train)
validation_images = len(X_val)
test_images = len(X_test)

print(f"Number of training images: {train_images}")
print(f"Number of validation images: {validation_images}")
print(f"Number of test images: {test_images}")


In [None]:
import tensorflow as tf

tf.config.run_functions_eagerly(True)

# Training model
history = model.fit(x=X_train, y=[y_train_gender, y_train_age], batch_size=128, epochs=100, validation_data=(X_val, [y_val_gender, y_val_age]),callbacks = [annealer, checkpointer])

In [None]:
acc = history.history['gender_out_accuracy']
val_acc = history.history['val_gender_out_accuracy']
epochs = range(len(acc))

In [None]:
plt.plot(epochs, acc, 'b', label='Training Loss')
plt.plot(epochs, val_acc, 'r', label='Validation Loss')
plt.title('Loss Graph')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['gender_out_accuracy'])
plt.plot(history.history['val_gender_out_accuracy'])
plt.title('Gender loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()


In [None]:
plt.plot(history.history['age_out_mae'])
plt.plot(history.history['val_age_out_mae'])
plt.title('Age loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()


In [None]:
image_index = 3000
print(f"Original Gender: \t{gender_dict[y_gender[image_index]]} \t Original Age: \t{y_age[image_index]}")

pred = model.predict(X[image_index].reshape(1,128,128,1))
pred_gender = gender_dict[int(round(pred[0][0][0]))]  
pred_age = round(pred[1][0][0])
print(f"Predicted Gender: \t{pred_gender}\t Predicted Age \t{pred_age}")
plt.title(f'Gender: {pred_gender}, Age: {pred_age}')
plt.imshow(X[image_index].reshape(128,128), cmap='gray') 
plt.show()  


In [None]:
image_index = 50
print(f"Original Gender: \t{gender_dict[y_gender[image_index]]} \t Original Age: \t{y_age[image_index]}")

pred = model.predict(X[image_index].reshape(1,128,128,1))
pred_gender = gender_dict[int(round(pred[0][0][0]))]  
pred_age = round(pred[1][0][0])
print(f"Predicted Gender: \t{pred_gender}\t Predicted Age \t{pred_age}")
plt.title(f'Gender: {pred_gender}, Age: {pred_age}')
plt.imshow(X[image_index].reshape(128,128), cmap='gray') 
plt.show()  

In [None]:
image_index = 35
print(f"Original Gender: \t{gender_dict[y_gender[image_index]]} \t Original Age: \t{y_age[image_index]}")

pred = model.predict(X[image_index].reshape(1,128,128,1))
pred_gender = gender_dict[int(round(pred[0][0][0]))]  
pred_age = round(pred[1][0][0])
print(f"Predicted Gender: \t{pred_gender}\t Predicted Age \t{pred_age}")
plt.title(f'Gender: {pred_gender}, Age: {pred_age}')
plt.imshow(X[image_index].reshape(128,128), cmap='gray') 
plt.show()  