In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import shutil
import os

src_dir = '/content/drive/MyDrive/facial_fuse/label-distribution-learning-fer-tf'
dst_dir = '/content/facial_fuse/label-distribution-learning-fer-tf'

os.makedirs(os.path.dirname(dst_dir), exist_ok=True)

shutil.copytree(src_dir, dst_dir, dirs_exist_ok=True)

print("Folder copied to /content/facial_fuse/label-distribution-learning-fer-tf")

In [None]:
import pickle
import numpy as np
from PIL import Image

with open('/content/drive/MyDrive/facial_fuse/train_data.pkl', 'rb') as f:
    train_data = pickle.load(f)

with open('/content/drive/MyDrive/facial_fuse/test_data.pkl', 'rb') as f:
    test_data = pickle.load(f)

print(f" Loaded {len(train_data)} training samples.")
print(f"Loaded {len(test_data)} testing samples.")
print("🔍 First train sample keys:", train_data[0].keys())

sample = train_data[0]
print("\n Sample Structure:")
for k, v in sample.items():
    print(f"{k}: {v}")

import numpy as np
landmark = np.load(sample['npy'])
print(f"\n Landmark shape: {landmark.shape}")

img = Image.open(sample['img']).convert('RGB')
img_array = np.array(img)
print(f"Image shape: {img_array.shape}")  #(H, W, 3)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, losses, applications
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
import pickle
from tensorflow.keras.utils import to_categorical

with open('/content/drive/MyDrive/facial_fuse/train_data.pkl', 'rb') as f:
    train_data = pickle.load(f)

with open('/content/drive/MyDrive/facial_fuse/test_data.pkl', 'rb') as f:
    test_data = pickle.load(f)

def preprocess_sample(sample):
    img = load_img(sample['img'], target_size=(100, 100))
    img = img_to_array(img) / 255.0
    img = (img - 0.5) / 0.5
    landmark = np.load(sample['npy']).astype(np.float32).reshape(-1)
    label = sample['label']
    return img, landmark, label

X_img_train, X_lmk_train, y_train = zip(*[preprocess_sample(s) for s in train_data])
X_img_test, X_lmk_test, y_test = zip(*[preprocess_sample(s) for s in test_data])

X_img_train, X_lmk_train, y_train = np.array(X_img_train), np.array(X_lmk_train), to_categorical(y_train, 7)
X_img_test, X_lmk_test, y_test = np.array(X_img_test), np.array(X_lmk_test), to_categorical(y_test, 7)

def build_early_fusion_model():
    base_model = applications.ResNet50(include_top=False, input_shape=(100, 100, 3), pooling='avg', weights='imagenet')
    base_model.trainable = False
    for layer in base_model.layers[-10:]:
        layer.trainable = True

    img_input = layers.Input(shape=(100, 100, 3))
    x_img = base_model(img_input)

    lmk_input = layers.Input(shape=(1434,))
    x_lmk = layers.Dense(128, activation='relu')(lmk_input)
    x_lmk = layers.Dense(64, activation='relu')(x_lmk)

    x = layers.Concatenate()([x_img, x_lmk])
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    output = layers.Dense(7, activation='softmax')(x)

    return models.Model(inputs=[img_input, lmk_input], outputs=output)

model = build_early_fusion_model()
model.compile(optimizer=optimizers.Adam(1e-4),
              loss=losses.CategoricalCrossentropy(label_smoothing=0.1),
              metrics=['accuracy'])

model.fit([X_img_train, X_lmk_train], y_train,
          validation_data=([X_img_test, X_lmk_test], y_test),
          epochs=10,
          batch_size=64)
model.save("early_fusion_full.h5")

In [18]:
from google.colab import files
files.download("early_fusion_full.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# import os
# import pickle
# import numpy as np
# from PIL import Image
# import tensorflow as tf
# from tensorflow.keras import layers, models, callbacks
# from sklearn.model_selection import train_test_split
# def normalize_landmarks(landmark):
#     mean = np.mean(landmark, axis=0)
#     std = np.std(landmark, axis=0)
#     return (landmark - mean) / (std + 1e-6)

# landmark_dim = (478, 3)
# input_dim = np.prod(landmark_dim)
# embedding_shape = (7, 7, 64)
# embedding_dim = np.prod(embedding_shape)
# num_classes = 7
# save_path = "/content/mlp_landmark_backbone_midfusion.h5"

# def load_pickle_data(path, num_classes=7):
#     with open(path, 'rb') as f:
#         data = pickle.load(f)
#     landmarks, labels = [], []
#     for sample in data:
#         try:
#             landmark = np.load(sample['npy'])
#             if landmark.shape != landmark_dim:
#                 continue
#             landmark += np.random.normal(0, 0.5, landmark.shape)
#             landmarks.append(landmark.flatten())
#             labels.append(int(sample['label']))
#         except:
#             continue
#     X = np.array(landmarks)
#     y = tf.keras.utils.to_categorical(labels, num_classes)
#     return X, y

# X_train_full, y_train_full = load_pickle_data('/content/drive/MyDrive/facial_fuse/train_data.pkl')
# X_test, y_test = load_pickle_data('/content/drive/MyDrive/facial_fuse/test_data.pkl')

# X_train, X_val, y_train, y_val = train_test_split(
#     X_train_full, y_train_full, test_size=0.2, stratify=y_train_full.argmax(axis=1), random_state=42
# )

# def build_improved_mlp_backbone(feature_dim):
#     model = models.Sequential([
#         layers.Input(shape=(478, 3)),  # preserve spatial structure
#         layers.Dense(512, kernel_initializer='he_normal'),
#         layers.BatchNormalization(),
#         layers.ReLU(),
#         layers.Dropout(0.6),

#         layers.Dense(256, kernel_initializer='he_normal'),
#         layers.BatchNormalization(),
#         layers.ReLU(),
#         layers.Dropout(0.6),

#         layers.Dense(feature_dim, kernel_initializer='he_normal'),
#         layers.BatchNormalization(),
#         layers.ReLU(),
#         layers.Dropout(0.6),
#     ])
#     return model

# def build_classifier(backbone, embedding_shape, num_classes):
#     inputs = tf.keras.Input(shape=(input_dim,))
#     x = backbone(inputs)
#     x = layers.GlobalAveragePooling2D()(x)
#     outputs = layers.Dense(num_classes, activation='softmax')(x)
#     return tf.keras.Model(inputs, outputs)

# backbone = build_midfusion_backbone(input_dim, embedding_shape)
# classifier = build_classifier(backbone, embedding_shape, num_classes)

# classifier.compile(
#     optimizer='adam',
#     loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
#     metrics=['accuracy']
# )

# cb = [
#     callbacks.EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True),
#     callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, verbose=1)
# ]

# classifier.fit(
#     X_train, y_train,
#     validation_data=(X_val, y_val),
#     epochs=50,
#     batch_size=64,
#     callbacks=cb,
#     verbose=2
# )

# # --- Save Backbone Only ---
# backbone.save(save_path)
# print(f"Mid-Fusion Landmark Backbone saved to: {save_path}")


In [None]:
import tensorflow as tf
import numpy as np
from PIL import Image
from tqdm import tqdm

model = tf.keras.models.load_model('/content/facial_fuse/label-distribution-learning-fer-tf/pretrained/resnet50.h5')

In [None]:
model.summary()


In [None]:
from tensorflow.keras.models import Model

feature_extractor = Model(
    inputs=model.input,
    outputs=model.get_layer("680").output
)

In [None]:
sample_img_path = train_data[0]['img']

img = Image.open(sample_img_path).convert('RGB').resize((112, 112))
img_array = np.array(img).astype(np.float32)

img_array = np.transpose(img_array, (2, 0, 1))  #(3, 112, 112)

x = np.expand_dims(img_array, axis=0)  #(1, 3, 112, 112)


feature_vector = feature_extractor.predict(x)  #(1, 2048)

print("ResNet Feature Shape:", feature_vector.shape)
print("First 10 Feature Values:\n", feature_vector[0][:10])

In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import (Input, Concatenate, GlobalAveragePooling2D,
                                     Dense, Dropout, Conv2D, BatchNormalization, Activation)
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
import numpy as np
import pickle
from PIL import Image

landmark_backbone = load_model('/content/mlp_landmark_backbone_midfusion.h5', compile=False)
landmark_backbone.trainable = False

def load_data(pkl_path):
    with open(pkl_path, 'rb') as f:
        return pickle.load(f)

def process_image(img_path):
    img = Image.open(img_path).convert('RGB').resize((224, 224))
    return np.array(img) / 255.0

def process_landmark(npy_path):
    landmark = np.load(npy_path)
    return landmark.reshape(-1)

train_data = load_data('/content/drive/MyDrive/facial_fuse/train_data.pkl')
test_data = load_data('/content/drive/MyDrive/facial_fuse/test_data.pkl')

def build_dataset(data, num_classes=7):
    imgs, lms, labels = [], [], []
    for sample in data:
        imgs.append(process_image(sample['img']))
        lms.append(process_landmark(sample['npy']))
        labels.append(sample['label'])
    imgs = np.array(imgs)
    lms = np.array(lms)
    labels = tf.keras.utils.to_categorical(labels, num_classes)
    return imgs, lms, labels

x_train_img, x_train_lm, y_train = build_dataset(train_data)
x_test_img, x_test_lm, y_test = build_dataset(test_data)


img_input = Input(shape=(224, 224, 3), name='img_input')
resnet_base = ResNet50(include_top=False, input_tensor=img_input, weights='imagenet')
resnet_base.trainable = False
resnet_out = resnet_base.output  #(B, 7, 7, 2048)
resnet_out = Conv2D(512, kernel_size=1, activation='relu')(resnet_out)

lm_input = Input(shape=(1434,), name='landmark_input')
lm_proj = landmark_backbone(lm_input)  #(B, 7, 7, 64)

fusion = Concatenate(axis=-1)([resnet_out, lm_proj])  #(B, 7, 7, 576)

x = Conv2D(256, kernel_size=3, padding='same')(fusion)
x = BatchNormalization()(x)
x = Activation('relu')(x)

x = Conv2D(128, kernel_size=3, padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)

x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
output = Dense(7, activation='softmax')(x)

mid_fusion_model = Model(inputs=[img_input, lm_input], outputs=output)
mid_fusion_model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

mid_fusion_model.summary()

callbacks = [
    ReduceLROnPlateau(patience=3, factor=0.5, verbose=1),
    ModelCheckpoint('mid_fusion_classifier.h5', save_best_only=True, monitor='val_accuracy', mode='max', verbose=1)
]

mid_fusion_model.fit(
    [x_train_img, x_train_lm], y_train,
    validation_data=([x_test_img, x_test_lm], y_test),
    epochs=50,
    batch_size=32,
    callbacks=callbacks
)
