<a href="https://colab.research.google.com/github/yash94404/Engine/blob/main/cleanedFaceDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [63]:
import shutil
shutil.rmtree("images")

In [None]:
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2
import os
import insightface
from insightface.app import FaceAnalysis
from insightface.data import get_image as ins_get_image
from PIL import Image
import math


# STEP 2: Create an FaceDetector object.
base_options = python.BaseOptions(model_asset_path='detector.tflite')
options = vision.FaceDetectorOptions(base_options=base_options, min_detection_confidence = 0.75)
detector = vision.FaceDetector.create_from_options(options)
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.75, min_tracking_confidence=0.75)
mp_drawing = mp.solutions.drawing_utils
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)

app = FaceAnalysis()
app.prepare(ctx_id=0, det_thresh=0.5)

FONTS =cv2.FONT_HERSHEY_COMPLEX

# face bounder indices
FACE_OVAL=[ 10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136, 172, 58, 132, 93, 234, 127, 162, 21, 54, 103,67, 109]

# lips indices for Landmarks
LIPS=[ 61, 146, 91, 181, 84, 17, 314, 405, 321, 375,291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95,185, 40, 39, 37,0 ,267 ,269 ,270 ,409, 415, 310, 311, 312, 13, 82, 81, 42, 183, 78 ]
LOWER_LIPS =[61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95]
UPPER_LIPS=[ 185, 40, 39, 37,0 ,267 ,269 ,270 ,409, 415, 310, 311, 312, 13, 82, 81, 42, 183, 78]
# Left eyes indices
LEFT_EYE =[ 362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385,384, 398 ]
LEFT_EYEBROW =[ 336, 296, 334, 293, 300, 276, 283, 282, 295, 285 ]

# right eyes indices
RIGHT_EYE=[ 33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161 , 246 ]
RIGHT_EYEBROW=[ 70, 63, 105, 66, 107, 55, 65, 52, 53, 46 ]

In [73]:
def save_frames_from_videos(video_paths: list, save_path, every_n_frames=20, debug=False):
  '''
  given list of video paths, save every n frames to save_path
  '''
  def save_frames(video_path, total_frames_saved):
      vidcap = cv2.VideoCapture(video_path)
      success, image = vidcap.read()
      count = 0
      while success:
          if count % every_n_frames == 0:
              # add padding to frame number
              frame_num = str(count).zfill(6)
              frame_path = os.path.join(save_path, f'{video_path.split("/")[-1].split(".")[0]}_{frame_num}.jpg')
              cv2.imwrite(frame_path, image) # save frame as JPEG file
              if debug:
                  print(f"saved to {frame_path}")
              total_frames_saved += 1

          success, image = vidcap.read()
          #progress_bar.update(1)
          count += 1

      return total_frames_saved

  total_frames_saved = 0

  if not os.path.exists(save_path):
      os.makedirs(save_path)

  for video_path in video_paths:
      total_frames_saved += save_frames(video_path, total_frames_saved)

def genCroppedFace(img, bbox):
  dframe= cv2.imread(img)
  image_rows, image_cols, _ = dframe.shape
  image_input = cv2.cvtColor(dframe, cv2.COLOR_BGR2RGB)
  rect_start_point = (int(bbox[0]), int(bbox[1]))
  rect_end_point = (int(bbox[2]), int(bbox[3]))
  xleft,ytop= rect_start_point
  xright,ybot=rect_end_point
  crop_img = image_input[ytop: ybot, xleft: xright]
  crop_img = cv2.cvtColor(crop_img, cv2.COLOR_RGB2BGR)
  return crop_img

def getCroppedMediapipe(files):
  allCroppedFaces = []
  #cropDict = {}
  for filename in files:
    #print(filename)
    image = mp.Image.create_from_file(filename)
    counter = 0
    # STEP 4: Detect faces in the input image.
    detection_result = detector.detect(image)
    for detection in detection_result.detections:
      # Draw bounding_box
      bbox = detection.bounding_box
      start_point = bbox.origin_x, bbox.origin_y
      end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
      bbox = [start_point[0], start_point[1], end_point[0], end_point[1]]
      croppedFace = genCroppedFace(filename, bbox)
      cropfilename = filename.split('/')[0] + '/'+ "cropped_" + str(counter) + "_" + filename.split('/')[1]
      #cropfilename = "cropped " + str(counter) + " " + filename
      cv2.imwrite(cropfilename, croppedFace)
      #cropDict[cropfilename] = filename
      allCroppedFaces.append(cropfilename)
      counter += 1
  return allCroppedFaces
def add_padding(filename, amt = 50): #"./newstraightframes/cropped 0 i think you should leave season 2 trailer_000615.jpg", 50
  image = Image.open(filename)
  right = amt
  left = amt
  top = amt
  bottom = amt
  width, height = image.size

  new_width = width + right + left
  new_height = height + top + bottom

  result = Image.new(image.mode, (new_width, new_height), (255, 255, 255))

  result.paste(image, (left, top))
  result.save(filename)

def euclaideanDistance(point, point1):
    x, y = point
    x1, y1 = point1
    distance = math.sqrt((x1 - x)**2 + (y1 - y)**2)
    return distance

def blinkRatio(img, landmarks, right_indices, left_indices):
    # Right eyes
    # horizontal line
    rh_right = landmarks[right_indices[0]]
    rh_left = landmarks[right_indices[8]]
    # vertical line
    rv_top = landmarks[right_indices[12]]
    rv_bottom = landmarks[right_indices[4]]

    print(rh_right, rh_left, rv_top, rv_bottom)
    right_eye_center = ((rh_right[0] + rh_left[0])/2, (rh_right[1]+rh_left[1]/2))
    # draw lines on right eyes
    # cv.line(img, rh_right, rh_left, utils.GREEN, 2)
    # cv.line(img, rv_top, rv_bottom, utils.WHITE, 2)

    # LEFT_EYE
    # horizontal line
    lh_right = landmarks[left_indices[0]]
    lh_left = landmarks[left_indices[8]]

    # vertical line
    lv_top = landmarks[left_indices[12]]
    lv_bottom = landmarks[left_indices[4]]

    left_eye_center = ((lh_right[0] + lh_left[0])/2, (lh_right[1]+lh_left[1]/2))

    left_eye_x = left_eye_center[0]
    left_eye_y = left_eye_center[1]
    right_eye_x = right_eye_center[0]
    right_eye_y = right_eye_center[1]

    delta_x = right_eye_x - left_eye_x
    delta_y = right_eye_y - left_eye_y

    # Slope of line formula
    angle = np.arctan(delta_y / delta_x)

    # Converting radians to degrees
    angle = (angle * 180) / np.pi

    # Provided a margin of error of 10 degrees
    # (i.e, if the face tilts more than 10 degrees
    # on either side the program will classify as right or left tilt)
    eyeDirection = ""
    if angle > 10:
        eyeDirection = "Right"
        #cv2.putText(img, 'RIGHT TILT :' + str(int(angle))+' degrees',
         #              (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
         #              (0, 0, 0), 2, cv2.LINE_4)
    elif angle < -10:
        eyeDirection = "Left"
        #cv2.putText(img, 'LEFT TILT :' + str(int(angle))+' degrees',
        #               (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
        #               (0, 0, 0), 2, cv2.LINE_4)
    else:
        eyeDirection = "Straight"
        #cv2.putText(img, 'STRAIGHT :', (20, 30),
        #               cv2.FONT_HERSHEY_SIMPLEX, 1,
        #               (0, 0, 0), 2, cv2.LINE_4)

    #cv2_imshow(img)

    rhDistance = euclaideanDistance(rh_right, rh_left)
    rvDistance = euclaideanDistance(rv_top, rv_bottom)

    lvDistance = euclaideanDistance(lv_top, lv_bottom)
    lhDistance = euclaideanDistance(lh_right, lh_left)

    if rvDistance == 0 or lvDistance == 0:
      return 100, "bad"
    reRatio = rhDistance/rvDistance
    leRatio = lhDistance/lvDistance

    ratio = (reRatio+leRatio)/2
    return ratio, eyeDirection

def findFacePose(image_path):
  #print(image_path)
  lookingForward = True
  eyesOpen = True
  image = cv2.imread(image_path)
  image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
  image.flags.writeable = False
  results = face_mesh.process(image)
  image.flags.writeable = True
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
  img_h, img_w, img_c = image.shape
  #print(image.shape)
  face_3d = []
  face_2d = []
  mesh_coord = []
  if results.multi_face_landmarks:
    for face_landmarks in results.multi_face_landmarks:
      for idx, lm in enumerate(face_landmarks.landmark):
        if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:
          if idx == 1:
            nose_2d = (lm.x * img_w, lm.y * img_h)
            nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 3000)

          x, y = int(lm.x * img_w), int(lm.y * img_h)
          face_2d.append([x,y])
          face_3d.append([x,y,lm.z])
        mesh_coord.append((int(lm.x*img_w), int(lm.y*img_h)))
      face_2d = np.array(face_2d, dtype = np.float64)
      face_3d = np.array(face_3d, dtype= np.float64)

      #print(face_3d)
      focal_length = 1 * img_w
      #print("FOCAL LENGTH IS ", focal_length)
      cam_matrix = np.array([ [focal_length, 0, img_h / 2],
                                [0, focal_length, img_w / 2],
                                  [0, 0, 1]])
      #print(dist_matrix)
      dist_matrix = np.zeros((4, 1), dtype=np.float64)
      #print(face_3d, type(face_3d))
      #print(face_2d, type(face_2d))
      #print(cam_matrix, type(cam_matrix[0][0]))
      #print(dist_matrix, type(dist_matrix))
      success, rot_vec, trans_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, dist_matrix)
      rmat, jac = cv2.Rodrigues(rot_vec)
      angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)
      x = angles[0] * 360
      y = angles[1] * 360
      z = angles[2] * 360
      #print(x, y, z)
      # See where the user's head tilting
      if y < -30 or y > 30 or x < -30 or x > 30:
        lookingForward = False

      ratio, eyeDirection = blinkRatio(image, mesh_coord, RIGHT_EYE, LEFT_EYE)
      #return ratio
      if ratio > 4:
        eyesOpen = False
      #print(text)
      # Display the nose direction
      #nose_3d_projection, jacobian = cv2.projectPoints(nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix)
      #print(nose_3d)
      #rint(nose_3d_projection)
      #p1 = (int(nose_2d[0]), int(nose_2d[1]))
      #p2 = (int(nose_2d[0] + y * 10) , int(nose_2d[1] - x * 10))
      #print(p1, p2)
      #cv2.line(image, p1, p2, (255, 0, 0), 3)

      # Add the text on the image
      #cv2.putText(image, text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
      #cv2.putText(image, "x: " + str(np.round(x,2)), (500, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
      #cv2.putText(image, "y: " + str(np.round(y,2)), (500, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
      #cv2.putText(image, "z: " + str(np.round(z,2)), (500, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    #cv2_imshow(image)
    return (lookingForward, x, y, z, eyesOpen, ratio, eyeDirection)
def findAllStraight(ims):
  allStraight = []
  acceptableFrames = []
  for im in ims:
    tup = findFacePose(im)
    print(tup)
    if tup and tup[0]:
      acceptableFrames.append(im)
      if tup[0] and tup[4] and tup[6] == "Straight":
        allStraight.append(im)
        print("x = ", tup[1], ", y = ", tup[2])
  return allStraight, acceptableFrames
from sklearn import cluster
def generateClustersNew(files, dataset_path):
  accepted_files = []
  features = []
  counter = 1
  #newfiles = set()
  #for file in files:
  #  newfiles.add(cropDict[file])
  for file in files:
    if not file.endswith('.jpg'):
      continue
    #print('processing image %d: %s'%(counter,file))

    #img = cv2.imread(os.path.join(dataset_path, file))
    img = cv2.imread(file)
    #cv2_imshow(img)

    faces = app.get(img)
    if not faces:
      #print("HI")
      continue
    #print(faces[0])
    print(len(faces))
    #print("_________")
    features.append(faces[0].normed_embedding)

    accepted_files.append(file)

    counter+=1
  #y_pred = cluster.DBSCAN(eps=1, min_samples=2).fit_predict(features)
  #print(accepted_files)
  #print(y_pred)
  #show_clustering_result(y_pred, dataset_path, accepted_files)
  print(features)
  y_pred = cluster.AgglomerativeClustering(n_clusters=None, distance_threshold=1, linkage='single').fit_predict(features)
  #print(accepted_files)
  #print(y_pred)

  clusterDict = {}
  for i in range(len(y_pred)):
    if y_pred[i] not in clusterDict.keys():
      clusterDict[y_pred[i]] = list()
    clusterDict[y_pred[i]].append(accepted_files[i])
  #print(clusterDict)
  #show_clustering_result(y_pred, dataset_path, accepted_files)
  return clusterDict

In [None]:
save_path = "images/"
video_files = ["ninetyflownby.mp4"]
save_frames_from_videos(video_files, save_path, every_n_frames=15, debug=False)
allFrames = []
for filename in os.listdir('images/'):
  if filename.endswith('.jpg'):
    allFrames.append('images/' + filename)
allCroppedFaces = getCroppedMediapipe(allFrames)
for face in allCroppedFaces:
  add_padding(face)
#allStraight, acceptableFrames = findAllStraight(allCroppedFaces)
#clusters = generateClustersNew(allStraight, "./")
clusters = generateClustersNew(allCroppedFaces, "./")

In [None]:
from google.colab.patches import cv2_imshow
for clusterID in clusters.keys():
  print(clusterID)
  for img in clusters[clusterID]:
    print(img)
    cv2_imshow(cv2.imread(img))

In [None]:
print(clusters[2])

In [None]:
!pip install viztracer

In [None]:
!pip install vizviewer

In [None]:
!viztracer face_detect_full.py --source "i_think_you_should_leave_season_2_trailer.mp4"

In [None]:
!vizviewer /content/result.json
