# Thư viện

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.api.preprocessing.image import load_img
from keras.api.models import Model
from keras.api.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, BatchNormalization, Input
from PIL import Image
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

# Xử lý dữ liệu

In [2]:
datasetPath = "data/UTKFace"

In [3]:
dataList = os.listdir(datasetPath)

In [4]:
np.random.shuffle(dataList)

In [None]:
ages = list()
gender = list()

for filename in tqdm(dataList):
    dataName = filename.split('_')
    ages.append(int(dataName[0]))
    gender.append(int(dataName[1]))

In [6]:
df = pd.DataFrame({"image": dataList , "age": ages, "gender": gender})

In [None]:
df

In [None]:
df.info()

In [None]:
df.describe()

In [9]:
genderDict = {0: "Nam", 1: "Nữ"}

In [None]:
df.dtypes

In [None]:
img = Image.open(f"{datasetPath}/{df.iloc[0, 0]}")
plt.imshow(img)

In [12]:
trainDataset, testDataset = train_test_split(df, train_size=0.8, random_state=42)

In [None]:
trainDataset

In [None]:
testDataset

In [None]:
df.nunique()

In [None]:
trainDataset.nunique()

In [None]:
testDataset.nunique()

# Trích xuất thuộc tính

In [None]:
xTrain = list()
for image in tqdm(trainDataset['image']):
    img = load_img(f"{datasetPath}/{image}")
    img = img.resize((128, 128), Image.Resampling.LANCZOS)
    img = np.array(img)
    xTrain.append(img)
    
xTrain = np.array(xTrain)
xTrain = xTrain.reshape(len(xTrain), 128, 128, 3)

In [None]:
xTrain

## Chuẩn hóa

In [None]:
xTrain = xTrain/255

In [20]:
yGender = np.array(trainDataset["gender"])
yAge = np.array(trainDataset["age"])

# Mô hình CNN

In [None]:
inputSize = Input(shape=(128, 128, 3))
M = Conv2D(64, (3, 3), activation='relu')(inputSize)
M = BatchNormalization(axis=3)(M)
M = MaxPooling2D((3, 3))(M)
M = Conv2D(128, (3, 3), activation='relu')(M)
M = MaxPooling2D(strides=(2, 2))(M)
M = Conv2D(256, (3, 3), activation='relu')(M)
M = MaxPooling2D()(M)

M = Flatten()(M)

dense1 = Dense(256, activation='relu')(M)
dense2 = Dense(256, activation='relu')(M)
dense3 = Dense(128, activation='relu')(dense2)

dropout1 = Dropout(0.4)(dense1)
dropout2 = Dropout(0.4)(dense3)

outputGender = Dense(1, activation='sigmoid', name="genderOutput")(dropout1)
outputAge = Dense(1, activation='relu', name="ageOutput")(dropout2)

model = Model(inputs=[inputSize], outputs=[outputGender, outputAge])

model.summary()

In [22]:
model.compile(loss=['binary_crossentropy', 'mae'], optimizer='adam', metrics=['accuracy', 'mae'])

## Huấn luyện mô hình

In [None]:
modelCNN = model.fit(x=xTrain, y=[yGender, yAge], batch_size=32, epochs=10, validation_split=0.2)

## Xây dựng biểu đồ hàm mất mát

In [None]:
plt.plot(modelCNN.history['genderOutput_loss'])
plt.plot(modelCNN.history['val_genderOutput_loss'])
plt.title('Gender loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
plt.plot(modelCNN.history['ageOutput_loss'])
plt.plot(modelCNN.history['val_ageOutput_loss'])
plt.title('Age loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

## Biểu đồ độ chính xác

In [None]:
plt.plot(modelCNN.history['genderOutput_accuracy'])
plt.plot(modelCNN.history['val_genderOutput_accuracy'])
plt.title('Gender accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [53]:
import random

### Dùng mô hình dự đoán

In [None]:
index=random.randint(0,10000)
print("Original: Gender = ", genderDict[yGender[index]]," Age = ", yAge[index])

pred = model.predict(xTrain[index].reshape(1, 128, 128, 3))
pred_gender = genderDict[round(pred[0][0][0])] 
pred_age = round(pred[1][0][0])

print("Prediction: Gender = ", pred_gender," Age = ", pred_age)
plt.imshow(xTrain[index].reshape(128,128, 3))

## Xử lý dữ liệu của bộ dữ liệu kiểm thử

In [None]:
xTest = list()
for image in tqdm(testDataset['image']):
    img = load_img(f"{datasetPath}/{image}")
    img = img.resize((128, 128), Image.Resampling.LANCZOS)
    img = np.array(img)
    xTest.append(img)
    
xTest = np.array(xTest)
xTest = xTest.reshape(len(xTest), 128, 128, 3)

xTest = xTest/255

yGenderTest = np.array(testDataset["gender"])
yAgeTest = np.array(testDataset["age"])

## Tính toán độ chính xác và hàm mất mát trên bộ dữ liệu kiểm thử

In [None]:
test_loss, test_gender_loss, test_age_loss, test_gender_accuracy, test_age_mae = model.evaluate(xTest, [yGenderTest, yAgeTest], verbose=0)

print(f"Test Gender Loss: {test_gender_loss}")
print(f"Test Age Loss: {test_age_loss}")
print(f"Test Gender Accuracy: {test_gender_accuracy}")
print(f"Test Age MAE: {test_age_mae}")

## Lưu mô hình

In [None]:
model.save("model.keras")