In [1]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import os
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from transformers import DeiTForImageClassification, DeiTFeatureExtractor
import safetensors.torch
from sklearn.metrics import accuracy_score, classification_report

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
feature_extractor = DeiTFeatureExtractor.from_pretrained('facebook/deit-base-distilled-patch16-224')
# 데이터 변환 설정
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
])

model = DeiTForImageClassification.from_pretrained('facebook/deit-base-distilled-patch16-224', num_labels=4)
state_dict = safetensors.torch.load_file('model/model.safetensors')
model.load_state_dict(state_dict)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

Some weights of DeiTForImageClassification were not initialized from the model checkpoint at facebook/deit-base-distilled-patch16-224 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DeiTForImageClassification(
  (deit): DeiTModel(
    (embeddings): DeiTEmbeddings(
      (patch_embeddings): DeiTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): DeiTEncoder(
      (layer): ModuleList(
        (0-11): 12 x DeiTLayer(
          (attention): DeiTAttention(
            (attention): DeiTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): DeiTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): DeiTIntermediate(
            (dense): Linear(in

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from skimage import color
from tensorflow.keras.preprocessing import image
import dlib
from mtcnn import MTCNN

In [4]:
import json

with open ('label/test_anger.json', "r", encoding='euc-kr') as f:
    anger_label = json.load(f)
with open ('label/test_happy.json', "r", encoding='euc-kr') as f:
    happy_label = json.load(f)
with open ('label/test_panic.json', "r", encoding='euc-kr') as f:
    panic_label = json.load(f)
with open ('label/test_sadness.json', "r", encoding='euc-kr') as f:
    sadness_label = json.load(f)

In [5]:
import tensorflow as tf
import os
import logging
from mtcnn import MTCNN

def extract_face_info_mtcnn(img):

  detector = MTCNN() # MTCNN 인스턴스 생성

  faces = detector.detect_faces(img) # 얼굴 감지
  #face_info_list = [] # 감지된 얼굴 정보를 저장할 리스트 // 하나의 이미지에 다수의 얼굴이 존재할 경우 사용

  # 감지된 모든 얼굴에 대해 반복
  for face in faces:
    # 얼굴의 바운딩 박스와 특징점 추출
    bounding_box = face['box']
    keypoints = face['keypoints']

    # 추출한 정보를 딕셔너리로 저장
    face_info = {
      'bounding_box': bounding_box,
      'keypoints': keypoints
    }

    # 얼굴 정보 리스트에 추가
    #face_info_list.append(face_info)

  return face_info


def calculate_angle(face_info):
  left_eye = face_info['keypoints']['left_eye']
  right_eye = face_info['keypoints']['right_eye']

  return np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0]) * 180 / np.pi

def rotate_image(image, img_info):
  angle = calculate_angle(img_info)
  # 이미지의 중심 탐색
  height, width = image.shape[:2]
  center = (width / 2, height / 2)

  # 회전 변환 매트릭스 생성
  rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)

  # 이미지 회전
  rotated_image = cv2.warpAffine(image, rotation_matrix, (width, height))

  return rotated_image

def normalize_face(image, face_info, output_size=(224, 224)):
    landmarks = face_info['keypoints']
    left_eye = np.array(landmarks['left_eye'])
    right_eye = np.array(landmarks['right_eye'])
    nose_tip = np.array(landmarks['nose'])

    # 눈 사이 거리
    eye_distance = np.linalg.norm(left_eye - right_eye)

    # 스케일링 비율
    desired_eye_distance = 0.3 * output_size[0]
    scale = desired_eye_distance / eye_distance

    # 눈 사이 거리 정규화
    M = np.array([[scale, 0, 0], [0, scale, 0]])
    scaled_image = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))

    # 코 끝을 기준으로 중심 이동
    nose_center = nose_tip * scale
    offset_x = (output_size[0] / 2) - nose_center[0]
    offset_y = (output_size[1] / 2) - nose_center[1]
    M = np.array([[1, 0, offset_x], [0, 1, offset_y]])
    normalized_image = cv2.warpAffine(scaled_image, M, output_size)

    return normalized_image

In [6]:
# 이미지 가져오기
def get_image(filename, path):
  image = cv2.imread(path + filename)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  return image.copy()

# Label 데이터를 기반으로 이미지 Crop하는 함수
def crop_face(label_data, img):

  boxes = [label_data['annot_A']['boxes'], label_data['annot_B']['boxes'], label_data['annot_C']['boxes']]
  avg_box = { # annot_A/B/C 각 좌표의 평균 값 구해서 저장
      'maxX': np.mean([box['maxX'] for box in boxes]),
      'maxY': np.mean([box['maxY'] for box in boxes]),
      'minX': np.mean([box['minX'] for box in boxes]),
      'minY': np.mean([box['minY'] for box in boxes])
  }

  new_image = img[int(avg_box['minY']):int(avg_box['maxY']), int(avg_box['minX']):int(avg_box['maxX'])] # 평균값으로 이미지 crop

  return new_image

In [14]:
import torch.nn.functional as F


# 감정 레이블 정의


emotion_labels = ['anger', 'happy', 'panic', 'sadness']

# 이미지 전처리
def preprocess_image(label, path):
    img = get_image(label['filename'], path)  # 이미지 불러오기
    img = crop_face(label, img)  # 이미지 크롭
    img_info = extract_face_info_mtcnn(img)  # 이미지에서 얼굴 정보 추출
    
    if not img_info:
        raise ValueError("No face detected in the image.")
    
    img = rotate_image(img, img_info)  # 눈 수평 회전
    img = normalize_face(img, img_info)  # 코 기준 이미지 센터로 이동
    
    # Convert image to RGB if it's not
    if len(img.shape) == 2 or img.shape[2] == 1:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    
    img = cv2.resize(img, (224, 224))  # 이미지 크기 조정
    
    img_array = img.astype('float32') / 255.0  # 0~1 사이 값으로 정규화
    img_array = np.transpose(img_array, (2, 0, 1))  # 채널 순서 변경 (HWC -> CHW)
    
    # 텐서 변환
    img_tensor = torch.tensor(img_array).unsqueeze(0)  # 배치 차원 추가
    
    return img_tensor.to(device)

# 감정 분류 함수
def predict_emotion(img_tensor):
    model.eval()
    with torch.no_grad():
        outputs = model(img_tensor)
        logits = outputs.logits.cpu().numpy()
        probabilities = F.softmax(torch.tensor(logits), dim=1).numpy()  # Softmax 적용
        predicted_class = np.argmax(probabilities, axis=1)[0]
        predicted_emotion = emotion_labels[predicted_class]
        confidence = probabilities[0][predicted_class]
    return predicted_emotion, confidence

# 이미지 전처리 및 감정 예측
def get_predict_accuracy(label, path, true_emotion):
  correct_count = 0
  error_count = 0
  confidence_count = 0

  for i in range(len(label)):
    try:
      img_array = preprocess_image(label[i], path)
      predicted_emotion, confidence = predict_emotion(img_array)
      #true_emotion = label[i]['faceExp_uploader']
    except Exception as e:
      error_count += 1
      continue

    if predicted_emotion == true_emotion:
      correct_count += 1
      confidence_count += confidence

    filename = label[i]['filename']
    print(f'{i+1}, filename : {filename}')
    print(f'예측 : {predicted_emotion}\n실제 : {true_emotion}\n신뢰도 : {confidence:.2f}')

  return correct_count, error_count, confidence_count

In [15]:
anger_correct_count, anger_error_count, anger_confidence_count = get_predict_accuracy(anger_label, 'img/test/anger/', 'anger')
happy_correct_count, happy_error_count, happy_confidence_count = get_predict_accuracy(happy_label, 'img/test/happy/', 'happy')
panic_correct_count, panic_error_count, panic_confidence_count = get_predict_accuracy(panic_label, 'img/test/panic/', 'panic')
sadness_correct_count, sadness_error_count, sadness_confidence_count = get_predict_accuracy(sadness_label, 'img/test/sadness/', 'sadness')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82

In [16]:
entire_correct_count = anger_correct_count + happy_correct_count + panic_correct_count + sadness_correct_count
entire_error_count = anger_error_count + happy_error_count + panic_error_count + sadness_error_count
entire_lable_length = len(anger_label) + len(happy_label) + len(panic_label) + len(sadness_label)
entire_emotion_accuracy = entire_correct_count / (entire_lable_length - entire_error_count) * 100

In [17]:
# 예측을 위해 테스트 데이터 전처리를 시도했지만 전처리에 실패한 개수
print(f'Anger Error Image Count : {anger_error_count}')
print(f'Happy Error Image Count : {happy_error_count}')
print(f'Panic Error Image Count : {panic_error_count}')
print(f'Sadness Error Image Count : {sadness_error_count}')

Anger Error Image Count : 4
Happy Error Image Count : 8
Panic Error Image Count : 5
Sadness Error Image Count : 4


In [18]:
# 예측 감정과 정답 감정이 일치한 개수
print(f'Anger Correct Count : {anger_correct_count}')
print(f'Happy Correct Count : {happy_correct_count}')
print(f'Panic Correct Count : {panic_correct_count}')
print(f'Sadness Correct Count : {sadness_correct_count}')

Anger Correct Count : 199
Happy Correct Count : 277
Panic Correct Count : 238
Sadness Correct Count : 221


In [19]:
# 예측 감정과 정답 감정이 일치할 때 신뢰도의 평균
print(f'Mean of Anger confidence : {anger_confidence_count/anger_correct_count:.2f}')
print(f'Mean of Happy confidence : {happy_confidence_count/happy_correct_count:.2f}')
print(f'Mean of Panic confidence : {panic_confidence_count/panic_correct_count:.2f}')
print(f'Mean of Sadness confidence : {sadness_confidence_count/sadness_correct_count:.2f}')

Mean of Anger confidence : 0.96
Mean of Happy confidence : 0.99
Mean of Panic confidence : 0.98
Mean of Sadness confidence : 0.97


In [20]:
# 감정별 예측 정확도
print(f'Anger Predict Accuracy : {anger_correct_count / (len(anger_label) - anger_error_count) * 100:.2f}%')
print(f'Happy Predict Accuracy : {happy_correct_count / (len(happy_label) - happy_error_count) * 100:.2f}%')
print(f'Panic Predict Accuracy : {panic_correct_count / (len(panic_label) - panic_error_count) * 100:.2f}%')
print(f'Sadness Predict Accuracy : {sadness_correct_count / (len(sadness_label) - sadness_error_count) * 100:.2f}%')
# 전체 테스트 데이터셋에 대한 감정 예측 정확도
print(f'\nEntire Emotion Accuracy : {entire_emotion_accuracy:.2f}%')

Anger Predict Accuracy : 72.10%
Happy Predict Accuracy : 95.52%
Panic Predict Accuracy : 88.15%
Sadness Predict Accuracy : 78.93%

Entire Emotion Accuracy : 83.78%


In [None]:
def print_faceExp_count(counts, emotion):
    print(f"Counts for emotion '{emotion}'")
    print(f"  Zero annotation same   : {counts['zero']}")
    print(f"  One annotation same    : {counts['one']}")
    print(f"  Two annotations same   : {counts['two']}")
    print(f"  Three annotations same : {counts['three']}")
    print(f"  Sum of two, three : {counts['two'] + counts['three']}")
    print(f"  Sum of all        : {counts['zero'] + counts['one'] + counts['two'] + counts['three']}")

def verify_faceExp_consistent(label, emotion):
  counts = {'zero': 0, 'one': 0, 'two': 0, 'three': 0}
  verified_data = []

  for i in range(len(label)):
    emotion_count = 0
    for annot in ['annot_A', 'annot_B', 'annot_C']:
      if label[i][annot]['faceExp'] == emotion:
        emotion_count += 1

    if emotion_count == 1:
      counts['one'] += 1
    elif emotion_count == 2: # 2개 이상 일치시 리스트 추가
      counts['two'] += 1
      verified_data.append(label[i])
    elif emotion_count == 3: # 3개 일치시 리스트 추가
      counts['three'] += 1
      verified_data.append(label[i])
    else :
      counts['zero'] += 1

  print_faceExp_count(counts, emotion)

  return verified_data

In [None]:
verified_anger = verify_faceExp_consistent(anger_label, '분노')
verified_happy = verify_faceExp_consistent(happy_label, '기쁨')
verified_panic = verify_faceExp_consistent(panic_label, '당황')
verified_sadness = verify_faceExp_consistent(sadness_label, '슬픔')