In [54]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
import torch
from transformers import DeiTForImageClassification
from scipy.special import softmax
from sklearn.metrics import accuracy_score
from transformers import DeiTForImageClassification, DeiTFeatureExtractor
import torchvision.transforms as transforms
import safetensors.torch
import os
import cv2
import dlib
import torch
from PIL import Image
from mtcnn import MTCNN

In [55]:
# 모델 디렉토리 설정
eff0 = load_model("model/efficientnet_face_emotion_new_colab.h5") #efficientnet b0, tensorflow 2.15.0 ver
eff7 = load_model("model/efficientnet_face_emotion_b7_new.h5") #efficientnet b7
vit_model = tf.saved_model.load("model/ViT_b16") #ViT 모델

feature_extractor = DeiTFeatureExtractor.from_pretrained('facebook/deit-base-distilled-patch16-224') #사전학습 불러오기 
# 데이터 변환 설정
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
])

model = DeiTForImageClassification.from_pretrained('facebook/deit-base-distilled-patch16-224', num_labels=4)
state_dict = safetensors.torch.load_file('model/model.safetensors') #DeiT 모델 불러오기
model.load_state_dict(state_dict)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

Some weights of DeiTForImageClassification were not initialized from the model checkpoint at facebook/deit-base-distilled-patch16-224 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DeiTForImageClassification(
  (deit): DeiTModel(
    (embeddings): DeiTEmbeddings(
      (patch_embeddings): DeiTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): DeiTEncoder(
      (layer): ModuleList(
        (0-11): 12 x DeiTLayer(
          (attention): DeiTAttention(
            (attention): DeiTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): DeiTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): DeiTIntermediate(
            (dense): Linear(in

In [56]:
models_tf = [eff0, eff7, vit_model] #텐서플로우
models_pt = [model] #파이토치

labels = ['Anger', 'Happy', 'Panic', 'Sadness', 'Neutral']
label_to_index = {label: i for i, label in enumerate(labels)}

In [57]:
def cropface_dlib(image, padding=100):
    detector = dlib.get_frontal_face_detector()
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)
    for face_index, face in enumerate(faces):
        x, y, w, h = face.left(), face.top(), face.width(), face.height()

        x -= padding
        y -= padding
        w += padding * 2
        h += padding * 2

        x = max(0, x)
        y = max(0, y)
        w = min(image.shape[1] - x, w)
        h = min(image.shape[0] - y, h)

        face_image = image[y:y+h, x:x+w]

    return face_image

def extract_faceinfo_mtcnn(img):
    detector = MTCNN()
    faces = detector.detect_faces(img)
    if len(faces) == 0:
        return None #감지되지 않으면 None처리

    face = faces[0]
    bounding_box = face['box']
    keypoints = face['keypoints']
    face_info = {
        'bounding_box': bounding_box,
        'keypoints': keypoints
    }

    return face_info

def calculate_angle(face_info):
    left_eye = face_info['keypoints']['left_eye']
    right_eye = face_info['keypoints']['right_eye']

    return np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0]) * 180 / np.pi

def rotate_image(image, img_info):
    angle = calculate_angle(img_info)
    height, width = image.shape[:2]
    center = (width / 2, height / 2)
    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated_image = cv2.warpAffine(image, rotation_matrix, (width, height))
    return rotated_image

def normalize_face(image, face_info, output_size=(224, 224)):
    landmarks = face_info['keypoints']
    left_eye = np.array(landmarks['left_eye'])
    right_eye = np.array(landmarks['right_eye'])
    nose_tip = np.array(landmarks['nose'])

    eye_distance = np.linalg.norm(left_eye - right_eye)
    desired_eye_distance = 0.3 * output_size[0]
    scale = desired_eye_distance / eye_distance

    M = np.array([[scale, 0, 0], [0, scale, 0]])
    scaled_image = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))

    nose_center = nose_tip * scale
    offset_x = (output_size[0] / 2) - nose_center[0]
    offset_y = (output_size[1] / 2) - nose_center[1]
    M = np.array([[1, 0, offset_x], [0, 1, offset_y]])
    normalized_image = cv2.warpAffine(scaled_image, M, output_size)

    return normalized_image

In [58]:
# 예측 함수 정의
def predict_ensemble_tf(models, input):
    votes = []
    confidences = []

    for model in models:
        output = model(input, training=False)
        probabilities = tf.nn.softmax(output, axis=1)
        confidence = tf.reduce_max(probabilities, axis=1)
        predicted = tf.argmax(probabilities, axis=1)
        votes.append(predicted.numpy()[0])
        confidences.append(confidence.numpy()[0])
    
    return votes, confidences

In [59]:
def predict_ensemble_pt(models, input):
    votes = []
    confidences = []

    for model in models:
        model.eval()
        with torch.no_grad():
            output = model(input.to(device)).logits
            probabilities = softmax(output.cpu().numpy(), axis=1)
            confidence = np.max(probabilities, axis=1)
            predicted = np.argmax(probabilities, axis=1)
            votes.append(predicted[0])
            confidences.append(confidence[0])
    
    return votes, confidences

In [89]:
def vote_and_classify(votes, confidences, threshold=0.80):
    vote_count = {}
    for vote in votes:
        if vote in vote_count:
            vote_count[vote] += 1
        else:
            vote_count[vote] = 1
    
    max_vote = max(vote_count, key=vote_count.get)
    max_count = vote_count[max_vote]
    vote_prob = max_count / len(votes)
    
    if max_count >= 2: #모델 4개중 2개가 같다고 판단
        return max_vote, vote_prob
    elif all(conf < threshold for conf in confidences): #모두가 임계점 이하
        return 4, vote_prob  # Neutral
    else:
        return 4, vote_prob  # Neutral

In [90]:
# 데이터 로드
data_dir = r'H:\dataset\Validation'
test_images = []

In [93]:
for label in labels:
    folder_path = os.path.join(data_dir, label)
    if not os.path.isdir(folder_path):
        continue
    count = 0
    for filename in os.listdir(folder_path):
        if count >= 10:
            break
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(folder_path, filename)
            if not os.path.isfile(image_path):
                print(f"Error: File '{image_path}' does not exist.")
                continue
            test_images.append((image_path, label_to_index[label]))
            count += 1

In [94]:
predictions = []
true_labels = []
confidences = []

In [95]:
for image_path, true_label in test_images:
    image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR) #파일 경로에 한글이 있어서 변환
    if image is None:
        print(f"Error: Could not read image from '{image_path}'")
        continue
    
    # 얼굴 전처리
    image = cropface_dlib(image)
    try:
        face_info = extract_faceinfo_mtcnn(image)
    except Exception as e:
        print(f"Error: {str(e)} for image '{image_path}'")
        continue

    image = rotate_image(image, face_info)
    image = normalize_face(image, face_info)
    image = cv2.resize(image, (224, 224))

    # TensorFlow 모델 예측
    image_tf = image.astype('float32') / 255.0
    image_tf = np.expand_dims(image_tf, axis=0)
    votes_tf, confidences_tf = predict_ensemble_tf(models_tf, image_tf)

    # PyTorch 모델 예측
    img_array = image.astype('float32') / 255.0
    img_array = np.transpose(img_array, (2, 0, 1))  # 채널 순서 변경 (HWC -> CHW)
    img_tensor = torch.tensor(img_array).unsqueeze(0).to(device)
    votes_pt, confidences_pt = predict_ensemble_pt(models_pt, img_tensor)
    
    votes = votes_tf + votes_pt
    confidences_all = confidences_tf + confidences_pt

    predicted, vote_prob = vote_and_classify(votes, confidences_all)
    
    predictions.append(predicted)
    true_labels.append(true_label)
    confidences.append(vote_prob)

Error: local variable 'face_info' referenced before assignment for image 'H:\dataset\Validation\Panic\006b56dc2f8cda2361e1b01b2496d6f352dd5b1790f0a9b0bfcbe540b292247d_여_20_당황_공공시설&종교&의료시설_20210130215042-001-008.jpg'
Error: local variable 'face_info' referenced before assignment for image 'H:\dataset\Validation\Neutral\006b56dc2f8cda2361e1b01b2496d6f352dd5b1790f0a9b0bfcbe540b292247d_여_20_중립_상업시설&점포&시장_20210130220011-003-006.jpg'
Error: local variable 'face_info' referenced before assignment for image 'H:\dataset\Validation\Anger\007f299267a9d6a145f627e9993aeae4e8323652785a2f4d1b896f6489d3acc0_남_30_분노_공공시설&종교&의료시설_20201202203951-001-003.jpg'
Error: local variable 'face_info' referenced before assignment for image 'H:\dataset\Validation\Anger\007f299267a9d6a145f627e9993aeae4e8323652785a2f4d1b896f6489d3acc0_남_30_분노_공공시설&종교&의료시설_20201202203951-001-006.jpg'
Error: local variable 'face_info' referenced before assignment for image 'H:\dataset\Validation\Happy\007f299267a9d6a145f627e9993aeae4e8

In [96]:
#정확도 출력
accuracy = accuracy_score(true_labels, predictions)
print(f'Voting Ensemble Accuracy: {accuracy * 100:.2f}%')

Voting Ensemble Accuracy: 51.02%


In [97]:
emotion_confidence = {label: [] for label in labels}
emotion_accuracy = {label: [] for label in labels}
for i, pred in enumerate(predictions):
    emotion_confidence[labels[pred]].append(confidences[i])
    if true_labels[i] == pred:
        emotion_accuracy[labels[pred]].append(1)
    else:
        emotion_accuracy[labels[pred]].append(0)

for emotion, confs in emotion_confidence.items():
    avg_conf = np.mean(confs) if confs else 0
    avg_acc = np.mean(emotion_accuracy[emotion]) if emotion_accuracy[emotion] else 0
    print(f'Emotion: {emotion}, Average Confidence: {avg_conf:.2f}, Accuracy: {avg_acc:.2f}')

Emotion: Anger, Average Confidence: 0.71, Accuracy: 0.71
Emotion: Happy, Average Confidence: 0.96, Accuracy: 0.86
Emotion: Panic, Average Confidence: 0.79, Accuracy: 0.21
Emotion: Sadness, Average Confidence: 0.69, Accuracy: 0.75
Emotion: Neutral, Average Confidence: 0.00, Accuracy: 0.00
