<a href="https://colab.research.google.com/github/walternagai-uminho/colab-tests/blob/main/Extract_frames_from_video_and_emotions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Extraindo images de vídeos

## Configurações iniciais

### Bibliotecas necessárias

In [None]:
import cv2
import dlib
import numpy as np
import imutils
from scipy.spatial import distance as dist
from imutils import face_utils
from google.colab import drive
from google.colab.patches import cv2_imshow

### Montando a pasta do Google Drive

In [None]:
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [None]:
WIDTH = 224             # Largura da imagem de saída
HEIGHT = 224            # Altura da imagem de saíde
EYE_AR_THRESH = 0.18    # Threshold para detectar os olhos das faces

AMOSTRAGEM_SEGUNDOS = 10

## Abrindo, analisando e retirando imagens do vídeo

### Importando um vídeo e obtendo e suas características

In [None]:
video_file_path = '/content/drive/MyDrive/Colab Notebooks/scrum-sessao-01-beg-01m-02m-30fps.mp4'

cap = cv2.VideoCapture(video_file_path)
n_fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

seconds = frame_count // n_fps
minutes = seconds // 60
rem_sec = seconds % 60

print(f"n_fps = {n_fps}, frame_count = {frame_count}")
print(f"seconds = {seconds}, minutes = {minutes}, rem_sec: {rem_sec}")

n_fps = 30, frame_count = 1802
seconds = 60, minutes = 1, rem_sec: 0


### Retirando frames do vídeo de acordo com o tempo em segundos

In [None]:
![ ! -d "extracted_images" ] && mkdir -p "extracted_images"

image_list = []
success, image = cap.read()
frame_count = 0

while success:
    if (frame_count % (n_fps * AMOSTRAGEM_SEGUNDOS)) == 0:
        frame_time = frame_count // n_fps
        filename = "/content/extracted_images/image_{}.jpg".format(int(frame_time))
        cv2.imwrite(filename, image)
        image_list.append(filename)

    frame_count = frame_count + 1
    success, image = cap.read()
  
cap.release()

## Detectando faces usando haarcascade

### Obtendo arquivos para detecção de rostos e olhos

In [None]:
# Load the cascade  
![ !"haarcascade_frontalface_default.xml" ] && wget "https://github.com/kipr/opencv/raw/master/data/haarcascades/haarcascade_frontalface_default.xml"
![ !"haarcascade_eye.xml" ] && wget "https://github.com/kipr/opencv/raw/master/data/haarcascades/haarcascade_eye.xml"

face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')  
eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')

--2023-04-15 14:41:20--  https://github.com/kipr/opencv/raw/master/data/haarcascades/haarcascade_frontalface_default.xml
Resolving github.com (github.com)... 192.30.255.113
Connecting to github.com (github.com)|192.30.255.113|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/kipr/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml [following]
--2023-04-15 14:41:20--  https://raw.githubusercontent.com/kipr/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1254733 (1.2M) [text/plain]
Saving to: ‘haarcascade_frontalface_default.xml’


2023-04-15 14:41:21 (33.2 MB/s) - ‘haarcascade_frontalface_default.xml’ saved [125473

### Obtendo as faces e olhos das imagens

In [None]:
![ ! -d "extracted_faces" ] && mkdir -p "extracted_faces"

image_count = 1
face_list = []
for image in image_list:
    img = cv2.imread(image)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(img, 1.1, 6)  

    face_count = 1
    for (x, y, w, h) in faces:  
        cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)  
        roi_gray = gray[y:y + h, x:x + w]
        roi_gray_resized = cv2.resize(roi_gray, (WIDTH, HEIGHT))

        roi_color = img[y:y + h, x:x + w]
        roi_color_resized = cv2.resize(roi_color, (WIDTH, HEIGHT))
        eyes = eye_cascade.detectMultiScale(roi_color_resized)
        if len(eyes) > 1:
            for (ex,ey,ew,eh) in eyes:
                print(f"{ex} {ey} {ew} {eh}")
                if ew > 35 and ew < 82:
                    cv2.rectangle(roi_color_resized,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)
                    cv2_imshow(roi_color_resized)

            filename = "/content/extracted_faces/" + image.split('/')[-1] + f"-{image_count}" + "-" + str(face_count) + ".jpg"
            cv2.imwrite(filename, roi_gray_resized)
            face_list.append(filename)

        face_count = face_count + 1
    image_count = image_count + 1
    
    #cv2_imshow(img)

## Detectando faces usando a biblioteca dlib

### Criando pastas

In [None]:
![ ! -d "faces_attention" ] && mkdir -p "faces_attention"
![ ! -d "faces_noattention" ] && mkdir -p "faces_noattention"

### Importando arquivo de dados para detecção de faces

In [None]:
# https://github.com/italojs/facial-landmarks-recognition
# https://github.com/JeffTrain/selfie

![ !"shape_predictor_68_face_landmarks.dat" ] && wget https://github.com/JeffTrain/selfie/raw/master/shape_predictor_68_face_landmarks.dat

detector = dlib.get_frontal_face_detector()
detector_pontos = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
distancia_olhos_atencao = (20, 60)

--2023-04-14 19:28:29--  https://github.com/JeffTrain/selfie/raw/master/shape_predictor_68_face_landmarks.dat
Resolving github.com (github.com)... 140.82.114.3
Connecting to github.com (github.com)|140.82.114.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/JeffTrain/selfie/master/shape_predictor_68_face_landmarks.dat [following]
--2023-04-14 19:28:30--  https://raw.githubusercontent.com/JeffTrain/selfie/master/shape_predictor_68_face_landmarks.dat
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 99693937 (95M) [application/octet-stream]
Saving to: ‘shape_predictor_68_face_landmarks.dat’


2023-04-14 19:28:31 (233 MB/s) - ‘shape_predictor_68_face_landmarks.dat’ saved [99693937/99693937]



### Detecção dos olhos das faces

Cada olho é representado por 6 coordenadas (x, y), começando no canto esquerdo do olho (como se você estivesse olhando para a pessoa) e, em seguida, trabalhando no sentido horário ao redor do restante da região:

[Imagem das 6 coordenadas (x, y)](https://b2633864.smushcdn.com/2633864/wp-content/uploads/2017/04/blink_detection_6_landmarks.jpg?lossy=1&strip=1&webp=1)

Esse cálculo considera os pontos de referência (landmarks) descrito no trabalho de [Soukupová and Čech in their 2016 paper](http://vision.fe.uni-lj.si/cvww2016/proceedings/papers/05.pdf)

$ear = \frac{||p_2 - p_6|| + ||p_3 - p_5||}{2 * ||p_1 - p_4||}$

$ear$ é a razão de aspecto de olho ou *eye aspect ratio*.

In [None]:
# https://pyimagesearch.com/2017/04/24/eye-blink-detection-opencv-python-dlib/

(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]

def eye_aspect_ratio(eye):
	# compute the euclidean distances between the two sets of
	# vertical eye landmarks (x, y)-coordinates
	A = dist.euclidean(eye[1], eye[5])
	B = dist.euclidean(eye[2], eye[4])
	# compute the euclidean distance between the horizontal
	# eye landmark (x, y)-coordinates
	C = dist.euclidean(eye[0], eye[3])
	# compute the eye aspect ratio
	ear = (A + B) / (2.0 * C)
	# return the eye aspect ratio
	return ear

In [None]:
face_list = []
image_count = 1
for image in image_list:
    img = cv2.imread(image)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = detector(img_gray, 1)

    face_count = 1
    for face in faces:  
        # determine the facial landmarks for the face region, then
		# convert the facial landmark (x, y)-coordinates to a NumPy
		# array
        pontos = detector_pontos (img_gray, face)
        pontos = face_utils.shape_to_np(pontos)

		# extract the left and right eye coordinates, then use the
		# coordinates to compute the eye aspect ratio for both eyes
        leftEye = pontos[lStart:lEnd]
        rightEye = pontos[rStart:rEnd]
        leftEAR = eye_aspect_ratio(leftEye)
        rightEAR = eye_aspect_ratio(rightEye)

		# average the eye aspect ratio together for both eyes
        ear = (leftEAR + rightEAR) / 2.0

        x, y, w, h = face.left(), face.top(), face.width(), face.height()
        face_gray = img_gray[y:y + h, x:x + w]
        face_gray_resized = cv2.resize(face_gray, (WIDTH, HEIGHT))

        # if ear greater or equal EYV_AR_THRESHOLD the eyes are open
        if ear >= EYE_AR_THRESH:
            filename = "faces_attention/" + image.split('/')[-1] + "".format(image_count) + "-" + str(face_count) + ".jpg"
            
            face_list.append(filename)
            cv2.imwrite(filename, face_gray_resized)

            # compute the convex hull for the left and right eye, then
            # visualize each of the eyes
            leftEyeHull = cv2.convexHull(leftEye)
            rightEyeHull = cv2.convexHull(rightEye)

            cv2.drawContours(img, [leftEyeHull], -1, (0, 255, 0), 1)
            cv2.drawContours(img, [rightEyeHull], -1, (0, 255, 0), 1)

            CLASS = "attention"
        # Eyes are closed or blink
        else:
            filename = "faces_noattention/" + image.split('/')[-1] + f"-{image_count}" + "-" + str(face_count) + ".jpg"
            cv2.imwrite(filename, face_gray_resized)
            CLASS = "\tnoattention"
        
        #print(f"ear: {ear} - image_count: {image_count} - face_count: {face_count} - CLASS: {CLASS}")

        face_count = face_count + 1
    image_count = image_count + 1

    cv2_imshow(img)


## Detectando faces usando o DeepFace

### Instalando a biblioteca DeepFace

In [None]:
!pip install deepface

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting deepface
  Downloading deepface-0.0.79-py3-none-any.whl (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.6/49.6 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
Collecting fire>=0.4.0
  Downloading fire-0.5.0.tar.gz (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.3/88.3 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gunicorn>=20.1.0
  Downloading gunicorn-20.1.0-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.5/79.5 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
Collecting mtcnn>=0.1.0
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m61.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting retina-face>=0.0.1
  Downloading retina_face-0.0.

In [None]:
from deepface import DeepFace

Directory  /root /.deepface created
Directory  /root /.deepface/weights created


### Métricas de comparação

In [None]:
metrics = ["cosine", "euclidean", "euclidean_l2"]

### Detectando faces usando models

In [None]:
models = [
  "VGG-Face", 
  "Facenet", 
  "Facenet512", 
  "OpenFace", 
  "DeepFace", 
  "DeepID", 
  "ArcFace", 
  "Dlib", 
  "SFace",
]

### Detectando faces usando backends

A detecção e alinhamento facial são estágios iniciais importantes de um pipeline de reconhecimento facial moderno. Experimentos mostram que apenas o alinhamento aumenta a precisão do reconhecimento facial em quase 1%. Os detectores OpenCV, SSD, Dlib, MTCNN, RetinaFace e MediaPipe são envolvidos em deepface.

In [None]:
backends = [
  'opencv', 
  'ssd', 
  'dlib', 
  'mtcnn', 
  'retinaface', 
  'mediapipe'
]

In [None]:
#face verification
for i in range(len(face_list)-1):
    obj = DeepFace.verify(img1_path = face_list[i], 
                        img2_path = face_list[i+1], 
                        enforce_detection = False,
                        detector_backend = backends[0])
    if obj["verified"] == False:
        print(f"{face_list[i]} is different {face_list[i+1]}")

# Detectando emoções a partir das imagens

### Usando o FER

https://github.com/atulapra/Emotion-detection

https://www.edlitera.com/blog/posts/emotion-detection-in-images

https://towardsdatascience.com/the-ultimate-guide-to-emotion-recognition-from-facial-expressions-using-python-64e58d4324ff

https://github.com/rjrahul24/ai-with-python-series

https://github.com/rjrahul24/ai-with-python-series/tree/main/06.%20Emotion%20Recognition%20using%20Facial%20Images

https://github.com/rjrahul24/ai-with-python-series/tree/main/07.%20Emotion%20Recognition%20using%20Live%20Video