# Guess the age 2022-2023
Group 9:
*   Antonello Avella
*   Eugenio Carpentieri
*   Valerio Costantino
*   Claudio De Pisapia


In [None]:
%pip install opencv-python scipy tensorflow keras_applications scikit-learn

In [None]:
from google.colab import drive

drive.mount('/content/drive')

In [None]:
import cv2
import numpy as np

# In case of ModuleNotFoundError: No module named 'keras.engine.topology'
# Change from keras.engine.topology import get_source_inputs 
# to from keras.utils.layer_utils import get_source_inputs
# in file keras_vggface/models.py
#from keras_vggface.vggface import VGGFace
#from keras_vggface.utils import preprocess_input

from keras.callbacks import EarlyStopping, TensorBoard, ModelCheckpoint
from keras.models import Model
from keras.layers import Input, Dense, Concatenate, Flatten

from keras.utils import plot_model
import tensorflow as tf
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import keras
from deepface import DeepFace
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
import lightgbm as lgb
from google.colab import files

#Dataset loading

In [None]:
# DOWNLOAD THE FULL DATASET FROM KAGGLE
# To execute properly this cell you must be in "/content" folder.
# Moreover, inside "/content" folder ther must be a file named 'kaggle.json' (thanks to which the download can start).
# At the end of download, you can find the full dataset in "/content/dataset/training_caip_contest".

!pip install -q kaggle
!rm -r ~/.kaggle 
!mkdir ~/.kaggle
!mkdir dataset
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d robertofalcone/gta2022dataset 
!unzip gta2022dataset.zip -d dataset 

# You should also upload 'training_caip_contest.csv' in "/content/dataset/" in order to execute the next cells.

# Train and Val separations

In [None]:
df = pd.read_csv('./dataset/training_caip_contest.csv', names=['path', 'age'])
df.loc[np.less_equal(df['age'],10), 'group'] = '1'
df.loc[np.logical_and(df['age'].gt(10), np.less_equal(df['age'],20)), 'group'] = '2'
df.loc[np.logical_and(df['age'].gt(20), np.less_equal(df['age'],30)), 'group'] = '3'
df.loc[np.logical_and(df['age'].gt(30), np.less_equal(df['age'],40)), 'group'] = '4'
df.loc[np.logical_and(df['age'].gt(40), np.less_equal(df['age'],50)), 'group'] = '5'
df.loc[np.logical_and(df['age'].gt(50), np.less_equal(df['age'],60)), 'group'] = '6'
df.loc[np.logical_and(df['age'].gt(60), np.less_equal(df['age'],70)), 'group'] = '7'
df.loc[df['age'].gt(70), 'group'] = '8'

bins = [-1, 10, 20, 30, 40, 50, 60, 70, np.inf]
labels=['0-10','11-20','21-30', '31-40', '41-50', '51-60', '61-70', '70+']
count_groups = df.groupby(pd.cut(df['age'], bins=bins, labels=labels)).size().reset_index(name='count')
print(count_groups)

n_group_age=8

val_df = df.groupby('group').sample(frac=.2, random_state=1)
merged = pd.merge(df, val_df, how='outer', indicator=True)
train_df = merged.loc[merged._merge == 'left_only'].drop('_merge', axis=1)

bins = [-1, 10, 20, 30, 40, 50, 60, 70, np.inf]
labels=['0-10','11-20','21-30', '31-40', '41-50', '51-60', '61-70', '70+']
count_groups = val_df.groupby(pd.cut(val_df['age'], bins=bins, labels=labels)).size().reset_index(name='count')

n_train_samples = len(train_df)
n_val_samples = len(val_df)

print(count_groups)

## Features Extraction

In [None]:
final_model = keras.models.load_model('/content/drive/MyDrive/finalProject_AV/GTA_CAIP_Contest_Code/xception09_final', compile=False)
final_model.trainable = False
final_model.summary()

Model: "Final_output"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 299, 299, 3)]     0         
                                                                 
 xception (Functional)       (None, 10, 10, 2048)      20861480  
                                                                 
 global_average_pooling2d_1   (None, 2048)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_2 (Dense)             (None, 128)               262272    
                                                                 
 dense_3 (Dense)             (None, 1)                 129       
                                                                 
Total params: 21,123,881
Trainable params: 0
Non-trainable params: 21,123,881
__________________________________________

In [None]:
feature_extractor = keras.Model(inputs=final_model.layers[0].input, outputs=final_model.layers[2].output)

In [None]:
feature_extractor.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 299, 299, 3)]     0         
                                                                 
 xception (Functional)       (None, 10, 10, 2048)      20861480  
                                                                 
 global_average_pooling2d_1   (None, 2048)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
Total params: 20,861,480
Trainable params: 0
Non-trainable params: 20,861,480
_________________________________________________________________


In [None]:
batch_size_gen = 128
img_width=299
img_height=299

datagen_train=ImageDataGenerator(rescale=1./255.)
datagen_val=ImageDataGenerator(rescale=1./255.)

train_generator=datagen_train.flow_from_dataframe(
  dataframe=train_df,
  directory="/content/dataset/training_caip_contest/",
  x_col="path",
  y_col="age",
  batch_size=batch_size_gen,
  shuffle=False,
  class_mode="other",
  target_size=(img_width,img_height))

valid_generator=datagen_val.flow_from_dataframe(
  dataframe=val_df,
  directory="/content/dataset/training_caip_contest/",
  x_col="path",
  y_col="age",
  batch_size=batch_size_gen,
  shuffle=False,
  class_mode="other",
  target_size=(img_width,img_height))


Found 460058 validated image filenames.
Found 115015 validated image filenames.


In [None]:
x_train = np.empty((0,2048))
y_train = np.empty((0))
for i in range(0, (460058//batch_size_gen)+1):
  x, y= train_generator.next()
  features_train = feature_extractor.predict_on_batch(x)
  x_train = np.append(x_train, features_train, axis=0)
  y_train = np.append(y_train, y/100, axis=0)
  print(i)

x_val = np.empty((0,2048))
y_val = np.empty((0))
for i in range(0, (115015//batch_size_gen)+1):
  x, y= valid_generator.next()
  features_train = feature_extractor.predict_on_batch(x)
  x_val = np.append(x_val, features_train, axis=0)
  y_val = np.append(y_val, y/100, axis=0)

In [None]:
print(x_val.shape)

(115015, 2048)


In [None]:
np.save('/content/xception_x_train.npy', x_train)
np.save('/content/xception_y_train.npy', y_train)

np.save('/content/xception_x_val.npy', x_val)
np.save('/content/xception_y_val.npy', y_val)