# Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp -r "/content/drive/My Drive/ml/edyo/project" .
!unzip /content/project/dataset/lfw-deepfunneled.zip -d /content/project/dataset/lfw

In [3]:
cd /content/project

/content/project


In [None]:
!pip install face_recognition
!pip install faiss-cpu
!pip install insightface==0.3.5
!pip install onnxruntime
!pip install onnxruntime-gpu
!pip install mxnet

In [5]:
output_path = "/content/drive/My Drive/ml/edyo/project/output"

In [6]:
import os

import cv2
import numpy as np
import pandas as pd

import faiss

import utils.video_reader
import utils.recognizers
import importlib
importlib.reload(utils.video_reader)
importlib.reload(utils.recognizers)

from utils.video_reader import VideoReader
from utils.recognizers import FaceRecognizer, InsightFaceRecognizer

import glob
import time
import tqdm
import sys

import dlib
dlib.DLIB_USE_CUDA

True

# Build index

In [7]:
def create_index(index_path, embeddings_path, recognizer, dimensions=512):
    embeddings = []
    failed = []
    emdeddings_df = pd.DataFrame([], columns=["person_name", "photo_path"])

    for person_name in tqdm.tqdm(os.listdir(lfw_photo), file=sys.stdout):
        images_path = f"{lfw_photo}/{person_name}"
        for image_name in os.listdir(images_path):
            photo_path = f"{images_path}/{image_name}"
            try:
                image = cv2.imread(photo_path)[:,:,::-1]
                encoding = recognizer.get_encodings(image)

                embeddings.append(recognizer.get_face_embedding(encoding))
                emdeddings_df = emdeddings_df.append({"person_name":person_name, "photo_path":photo_path}, ignore_index=True)

            except Exception as e:
                failed.append(f"{photo_path}")

    embeddings = np.array(embeddings, dtype=np.float32)
    emdeddings_df.to_csv(embeddings_path, index=False)

    print(f"Processed images: {len(embeddings)}")
    print(f"Failed images: {len(failed)}")

    index = faiss.IndexFlatL2(dimensions)
    index.add(embeddings)
    faiss.write_index(index, index_path)


def add_new_person(embeddings_path, index_path, recognizer, person_name, photo_path):
    embeddings_df = pd.read_csv(embeddings_path)
    index = faiss.read_index(index_path)

    image = cv2.imread(photo_path)[:, :, ::-1]
    encoding = recognizer.get_encodings(image)
    try:
        index.add(np.array([recognizer.get_face_embedding(encoding)], dtype=np.float32))
        faiss.write_index(index, index_path)
        
        embeddings_df = embeddings_df.append({"person_name": person_name, "photo_path": photo_path}, ignore_index=True)
        embeddings_df.to_csv(embeddings_path, index=False)
    except ValueError as e:
        print(e)

In [None]:
lfw_path = './dataset/lfw'
lfw_photo = f"{lfw_path}/lfw-deepfunneled/lfw-deepfunneled"

photo_path = "./dataset/gans Hanna Shubina.jpg"
person_name = "Hanna Shubina"

## Face recognition + Faiss

In [None]:
embeddings_path = f"{output_path}/face_recognition_embeddings.csv"
index_path = f"{output_path}/face_recognition_vector.index"
recognizer = FaceRecognizer(ctx_id=0)
dimensions = 128

create_index(index_path, embeddings_path, recognizer, dimensions=dimensions)

100%|██████████| 5749/5749 [04:16<00:00, 22.42it/s]
Processed images: 11884
Failed images: 1349


In [None]:
add_new_person(embeddings_path, index_path, recognizer, person_name, photo_path)

embeddings_df = pd.read_csv(embeddings_path)
embeddings_df.iloc[-5:]

Unnamed: 0,person_name,photo_path
11880,Daniel_Barenboim,./dataset/lfw/lfw-deepfunneled/lfw-deepfunnele...
11881,Noah_Wyle,./dataset/lfw/lfw-deepfunneled/lfw-deepfunnele...
11882,Noah_Wyle,./dataset/lfw/lfw-deepfunneled/lfw-deepfunnele...
11883,Noah_Wyle,./dataset/lfw/lfw-deepfunneled/lfw-deepfunnele...
11884,Hanna Shubina,./dataset/gans Hanna Shubina.jpg


## Insightface + Faiss

In [None]:
embeddings_path = f"{output_path}/insightface_embeddings.csv"
index_path = f"{output_path}/insightface_vector.index"
recognizer = InsightFaceRecognizer(ctx_id=1)
dimensions = 512

create_index(index_path, embeddings_path, recognizer, dimensions=dimensions)

model ignore: /root/.insightface/models/antelopev2/1k3d68.onnx landmark_3d_68
model ignore: /root/.insightface/models/antelopev2/2d106det.onnx landmark_2d_106
model ignore: /root/.insightface/models/antelopev2/genderage.onnx genderage
find model: /root/.insightface/models/antelopev2/glintr100.onnx recognition ['None', 3, 112, 112] 127.5 127.5
find model: /root/.insightface/models/antelopev2/scrfd_10g_bnkps.onnx detection [1, 3, '?', '?'] 127.5 128.0
set det-size: (640, 640)
100%|██████████| 5749/5749 [07:02<00:00, 13.61it/s]
Processed images: 10934
Failed images: 2299


In [None]:
add_new_person(embeddings_path, index_path, recognizer, person_name, photo_path)

embeddings_df = pd.read_csv(embeddings_path)
embeddings_df.iloc[-5:]

Unnamed: 0,person_name,photo_path
10930,Daniel_Barenboim,./dataset/lfw/lfw-deepfunneled/lfw-deepfunnele...
10931,Noah_Wyle,./dataset/lfw/lfw-deepfunneled/lfw-deepfunnele...
10932,Noah_Wyle,./dataset/lfw/lfw-deepfunneled/lfw-deepfunnele...
10933,Noah_Wyle,./dataset/lfw/lfw-deepfunneled/lfw-deepfunnele...
10934,Hanna Shubina,./dataset/gans Hanna Shubina.jpg


# Process video

In [8]:
annotations_df = pd.read_csv(f"./dataset/Milestone 1 - Dataset.csv")
annotations_df[:5]

Unnamed: 0,Video,Unnamed: 1,Person in camera,Unnamed: 3,Unnamed: 4,Light conditions,Unnamed: 6,Unnamed: 7,Face,Unnamed: 9,Unnamed: 10,Background,Unnamed: 12,Engagment,Unnamed: 14,Mood,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Video quality,Unnamed: 21,Unnamed: 22,Noise
0,,,No person,One person,Multiple persons,Dark,Normal,Light,Visible,Not facing camera,Occluded,Plain,Messy,Involved,Distracted,Bored,Surprised,Smiling,Neutural,Interested,Low\n(640x360),Normal\n(960x540),High (1920x1080),
1,,52.0,3,112,0,0,112,0,112,15,2,112,0,112,16,2,4,1,112,8,0,0,112,0.0
2,video_1_0000.mp4,9.0,,+,,,+,,+,+,,+,,+,+,,,,+,,,,+,
3,video_1_0001.mp4,10.0,+,+,,,+,,+,+,,+,,+,+,,,,+,,,,+,
4,video_1_0002.mp4,7.0,,+,,,+,,+,,,+,,+,,,,,+,,,,+,


In [9]:
annotated_df = annotations_df[annotations_df.iloc[:, 1].notna()]
face_annotations_df = annotated_df.iloc[1:, [0,2,3,4]].copy()
face_annotations_df.columns = ["video_name", "no_face", "one_face", "multiple_faces"]
face_annotations_df.reset_index(drop=True, inplace=True)
face_annotations_df[:5]

Unnamed: 0,video_name,no_face,one_face,multiple_faces
0,video_1_0000.mp4,,+,
1,video_1_0001.mp4,+,+,
2,video_1_0002.mp4,,+,
3,video_1_0003.mp4,,+,
4,video_1_0004.mp4,,+,


In [10]:
def process_video(video_path, recognizer, skip_frames):
    vr = VideoReader(video_path)
    
    info_df = pd.DataFrame([], columns=["frame", "num_faces", "person_name", "distance", "process_time"])
    while vr.has_next():
        frame = vr.read(skip=skip_frames, rgb=True)
        
        try:
            start_time = time.time()
            face_encodings = recognizer.get_encodings(frame)
            person_name = None
            distance = None
            distance, person_name = recognizer.recognize_face(face_encodings)

            process_time = time.time() - start_time
            info_df = info_df.append({"frame": vr.get_frame_idx(), "num_faces": len(face_encodings),
                                        "person_name": person_name, "distance": distance,
                                        "process_time": process_time}, ignore_index=True)
        except:
            info_df = info_df.append({"frame": vr.get_frame_idx(), "num_faces": len(face_encodings),
                                      "person_name": None, "distance": None,
                                      "process_time": None}, ignore_index=True)
        
    return info_df, vr.fps, vr.video_length

def run(recognizer, videos_list, videos_path, recognizer_prefix, output_path):
    video_qualities = ["low", "medium", "high"] #["low"]#
    devices = ["gpu"] #["cpu", "gpu"]
    skips = [5, 10, 20, 30] #[30]#
    summary_df = pd.DataFrame([], columns=["video_path", "device", "skip_frames", "video_length_s", "fps", "processed_time", "output_fps"])

    # TODO simplify
    for video_name in tqdm.tqdm(videos_list, file=sys.stdout):
        for video_quality in video_qualities:
            video_path = f"{videos_path}/{video_quality}/{video_name}"

            for device in devices:
                for skip_frames in skips:
                    info_df, fps, video_length = process_video(video_path, recognizer, skip_frames=skip_frames)
                    info_df.to_csv(f"{output_path}/{video_quality}/{recognizer_prefix}_{video_name}_{device}_{skip_frames}.csv", index=False)
                    
                    video_length_s = video_length/fps
                    processed_time = info_df['process_time'].sum()
                    output_fps = video_length/processed_time
                    summary_df = summary_df.append({"video_path": video_path, "device":device, "skip_frames": skip_frames, 
                                                    "video_length_s": video_length_s, "fps": fps,
                                                    "processed_time": processed_time, "output_fps": output_fps},
                                                ignore_index=True)
                    
    summary_df.to_csv(f"{output_path}/{recognizer_prefix}_summary.csv", index=False)

In [11]:
os.makedirs(f"{output_path}/low", exist_ok=True)
os.makedirs(f"{output_path}/medium", exist_ok=True)
os.makedirs(f"{output_path}/high", exist_ok=True)

videos_list = face_annotations_df["video_name"].values
print(f"{len(videos_list)} videos")

52 videos


## Face recognition + Faiss

In [None]:
embeddings_path = f"{output_path}/face_recognition_embeddings.csv"
index_path = f"{output_path}/face_recognition_vector.index"
recognizer = FaceRecognizer(embeddings_path, index_path, ctx_id=0)

run(recognizer, videos_list, "./dataset/video", "fr", output_path)




  0%|          | 0/52 [00:00<?, ?it/s][A[A[A


  2%|▏         | 1/52 [04:13<3:35:25, 253.44s/it][A[A[A


  4%|▍         | 2/52 [08:21<3:29:57, 251.95s/it][A[A[A


  6%|▌         | 3/52 [12:41<3:27:32, 254.12s/it][A[A[A


  8%|▊         | 4/52 [16:57<3:23:49, 254.78s/it][A[A[A


 10%|▉         | 5/52 [21:14<3:20:08, 255.50s/it][A[A[A


 12%|█▏        | 6/52 [25:29<3:15:46, 255.37s/it][A[A[A


 13%|█▎        | 7/52 [29:48<3:12:18, 256.42s/it][A[A[A


 15%|█▌        | 8/52 [34:05<3:08:15, 256.70s/it][A[A[A


 17%|█▋        | 9/52 [38:26<3:04:42, 257.73s/it][A[A[A


 19%|█▉        | 10/52 [42:39<2:59:34, 256.53s/it][A[A[A


 21%|██        | 11/52 [46:55<2:55:07, 256.28s/it][A[A[A


 23%|██▎       | 12/52 [51:13<2:51:09, 256.73s/it][A[A[A


 25%|██▌       | 13/52 [55:31<2:47:07, 257.13s/it][A[A[A


 27%|██▋       | 14/52 [59:46<2:42:25, 256.47s/it][A[A[A


 29%|██▉       | 15/52 [1:04:01<2:37:59, 256.21s/it][A[A[A


 31%|███       | 16/52 [

## Insightface + Faiss

In [14]:
embeddings_path = f"{output_path}/insightface_embeddings.csv"
index_path = f"{output_path}/insightface_vector.index"
recognizer = InsightFaceRecognizer(embeddings_path, index_path, ctx_id=0)

run(recognizer, videos_list, "./dataset/video", "if", output_path)

download_path: /root/.insightface/models/antelopev2
Downloading /root/.insightface/models/antelopev2.zip from http://storage.insightface.ai/files/models/antelopev2.zip...


100%|██████████| 352289/352289 [00:06<00:00, 57006.66KB/s]


model ignore: /root/.insightface/models/antelopev2/1k3d68.onnx landmark_3d_68
model ignore: /root/.insightface/models/antelopev2/2d106det.onnx landmark_2d_106
model ignore: /root/.insightface/models/antelopev2/genderage.onnx genderage
find model: /root/.insightface/models/antelopev2/glintr100.onnx recognition ['None', 3, 112, 112] 127.5 127.5
find model: /root/.insightface/models/antelopev2/scrfd_10g_bnkps.onnx detection [1, 3, '?', '?'] 127.5 128.0
set det-size: (640, 640)
100%|██████████| 52/52 [1:18:06<00:00, 90.13s/it]
