In [1]:
import pandas as pd
import cv2
import PIL
import os
from IPython.display import Image, Video, display
from matplotlib import pyplot as plt
import glob
from frame_iterator import FramesIterator
from tqdm import tqdm

In [2]:
path = 'exp2'
filenames = glob.glob(path + "/*.csv")

dfs = []
for filename in filenames:
    dfs.append(pd.read_csv(filename))

# Concatenate all data into one DataFrame
df = pd.concat(dfs, ignore_index=True)

In [3]:
df.head()

Unnamed: 0,directory,video,frame_number,x1,y1,x2,y2,conf,class
0,2022Y02M05D11H,E211M00S60.mp4,408,173.388733,138.242737,217.847488,190.133026,0.561631,4
1,2022Y02M05D11H,E211M00S60.mp4,409,182.842987,133.724335,287.760437,331.947083,0.723904,0
2,2022Y02M05D11H,E211M00S60.mp4,409,174.868134,138.632584,216.73613,186.797699,0.52117,4
3,2022Y02M05D11H,E211M00S60.mp4,410,184.850616,134.370865,286.427551,332.733948,0.713885,0
4,2022Y02M05D11H,E211M00S60.mp4,410,227.105377,135.248383,278.494293,190.346786,0.509522,4


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1065495 entries, 0 to 1065494
Data columns (total 9 columns):
 #   Column        Non-Null Count    Dtype  
---  ------        --------------    -----  
 0   directory     1065495 non-null  object 
 1   video         1065495 non-null  object 
 2   frame_number  1065495 non-null  int64  
 3   x1            1065495 non-null  float64
 4   y1            1065495 non-null  float64
 5   x2            1065495 non-null  float64
 6   y2            1065495 non-null  float64
 7   conf          1065495 non-null  float64
 8   class         1065495 non-null  int64  
dtypes: float64(5), int64(2), object(2)
memory usage: 73.2+ MB


In [5]:
df['class'].value_counts()
# 4 - body, 0 - head, 1 - face
# ['body', 'face', 'car', 'bus', 'head', 'motorcycle', 'plate', 'truck']

0    597872
4    425643
1     40592
2      1135
7       162
3        86
5         3
6         2
Name: class, dtype: int64

In [6]:
df = df[(df['class'] == 0) | (df['class'] == 1 ) | (df['class'] == 4)]

In [7]:
def imshow(img):
    _,ret = cv2.imencode('.jpg', img) 
    i = Image(data=ret)
    display(i)

In [8]:
color = (255,255,0)
poster = [800, 0, 1400, 350] # poster
box2 = [1000, 130, 1120, 270] # face
# box2 = [1120, 308, 1200, 400] 
# box2 = [169, 540, 575, 1025]

In [9]:
def bb_intersection_over_union(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    coordsB = [(boxB[0], boxB[1]), (boxB[2], boxB[1]), (boxB[0], boxB[3]), (boxB[2], boxB[3])]
    # проверка на пересечение боксов
    for coord in coordsB:
        if coord[0] > boxA[0] and coord[0] < boxA[2] and coord[1] > boxA[1] and coord[1] < boxA[3]:
            xA = max(boxA[0], boxB[0])
            yA = max(boxA[1], boxB[1])
            xB = min(boxA[2], boxB[2])
            yB = min(boxA[3], boxB[3])

            # compute the area of intersection rectangle
            interArea = (xB - xA) * (yB - yA)

            # compute the area of both the prediction and ground-truth
            # rectangles
            boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
            boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])


            # compute the intersection over union by taking the intersection
            # area and dividing it by the sum of prediction + ground-truth
            # areas - the interesection area
            # iou = interArea / float(boxAArea+boxBArea - interArea)

            iou = interArea / float(boxBArea )
            # return the intersection over union value
            return iou
    return 0

In [10]:
bb_intersection_over_union(poster, box2)

1.0

In [11]:
df['iou_poster'] = df.apply (lambda row: bb_intersection_over_union(poster, row.iloc[3:7]), axis=1)

In [12]:
df['path_video'] = df.apply(lambda row: row['directory']+'/'+row['video'], axis=1)

In [13]:
df[df['iou_poster']==0].groupby('path_video').count()

Unnamed: 0_level_0,directory,video,frame_number,x1,y1,x2,y2,conf,class,iou_poster
path_video,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022Y02M03D07H/E228M00S60.mp4,652,652,652,652,652,652,652,652,652,652
2022Y02M04D15H/E232M00S60.mp4,270,270,270,270,270,270,270,270,270,270
2022Y02M05D06H/E253M00S60.mp4,4536,4536,4536,4536,4536,4536,4536,4536,4536,4536
2022Y02M05D06H/E255M00S60.mp4,6036,6036,6036,6036,6036,6036,6036,6036,6036,6036
2022Y02M05D06H/E256M00S60.mp4,5324,5324,5324,5324,5324,5324,5324,5324,5324,5324
...,...,...,...,...,...,...,...,...,...,...
2022Y02M08D08H/E248M00S60.mp4,2123,2123,2123,2123,2123,2123,2123,2123,2123,2123
2022Y02M08D09H/E214M00S60.mp4,583,583,583,583,583,583,583,583,583,583
2022Y02M08D09H/E216M00S60.mp4,551,551,551,551,551,551,551,551,551,551
2022Y02M08D09H/E218M00S60.mp4,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308


In [14]:
df_objects_counts = pd.DataFrame(df[(df['iou_poster']==0) & (df['conf']>0.75)].groupby('path_video').size(), columns=['objects_count'])
df_objects_counts

Unnamed: 0_level_0,objects_count
path_video,Unnamed: 1_level_1
2022Y02M03D07H/E228M00S60.mp4,363
2022Y02M04D15H/E232M00S60.mp4,148
2022Y02M05D06H/E253M00S60.mp4,2245
2022Y02M05D06H/E255M00S60.mp4,3724
2022Y02M05D06H/E256M00S60.mp4,2488
...,...
2022Y02M08D08H/E248M00S60.mp4,405
2022Y02M08D09H/E214M00S60.mp4,153
2022Y02M08D09H/E216M00S60.mp4,241
2022Y02M08D09H/E218M00S60.mp4,686


In [15]:
list_videos = df_objects_counts.index.tolist()

In [16]:
# with open('video_list.txt', 'w', encoding='UTF8') as f:
#     f.write('\n'.join(list_videos))

# Анализ голов и лиц

In [17]:
df_faces = df[(df['class']==1) & (df['conf'] > 0.65) & (df['iou_poster']==0)].copy()
# df_faces[df_faces['path_video']=='2022Y02M05D11H/E206M00S60.mp4']
df_faces

Unnamed: 0,directory,video,frame_number,x1,y1,x2,y2,conf,class,iou_poster,path_video
1072,2022Y02M05D11H,E211M00S60.mp4,894,389.792480,297.829437,442.637268,362.736237,0.671274,1,0.0,2022Y02M05D11H/E211M00S60.mp4
1098,2022Y02M05D11H,E211M00S60.mp4,900,367.756836,389.660767,422.666016,452.990417,0.669631,1,0.0,2022Y02M05D11H/E211M00S60.mp4
1102,2022Y02M05D11H,E211M00S60.mp4,901,358.674805,411.312988,426.198303,484.160400,0.669080,1,0.0,2022Y02M05D11H/E211M00S60.mp4
1112,2022Y02M05D11H,E211M00S60.mp4,904,333.785767,468.894562,403.277618,536.024963,0.785275,1,0.0,2022Y02M05D11H/E211M00S60.mp4
1122,2022Y02M05D11H,E211M00S60.mp4,906,324.436829,499.790771,397.812500,561.428467,0.793562,1,0.0,2022Y02M05D11H/E211M00S60.mp4
...,...,...,...,...,...,...,...,...,...,...,...
1064508,2022Y02M07D11H,E247M00S60.mp4,1052,374.855560,311.564728,436.621704,371.210449,0.768851,1,0.0,2022Y02M07D11H/E247M00S60.mp4
1064511,2022Y02M07D11H,E247M00S60.mp4,1053,377.404938,312.044037,440.138733,369.678589,0.727643,1,0.0,2022Y02M07D11H/E247M00S60.mp4
1064514,2022Y02M07D11H,E247M00S60.mp4,1054,379.794220,307.659088,445.169678,370.563446,0.760157,1,0.0,2022Y02M07D11H/E247M00S60.mp4
1064517,2022Y02M07D11H,E247M00S60.mp4,1055,384.033508,306.930084,449.133453,371.249176,0.753651,1,0.0,2022Y02M07D11H/E247M00S60.mp4


In [18]:
df_faces['box_square'] = df_faces.apply(lambda row: (row.x2-row.x1)*(row.y2-row.y1), axis=1)

In [19]:
df_faces.sort_values(by='box_square', ascending=False, inplace = True)
df_faces

Unnamed: 0,directory,video,frame_number,x1,y1,x2,y2,conf,class,iou_poster,path_video,box_square
65500,2022Y02M08D09H,E220M00S60.mp4,0,1321.211670,0.000000,1857.300049,570.485046,0.673629,1,0.0,2022Y02M08D09H/E220M00S60.mp4,305830.403708
66020,2022Y02M08D09H,E220M00S60.mp4,109,1253.711670,0.000000,1749.082031,537.779785,0.664065,1,0.0,2022Y02M08D09H/E220M00S60.mp4,266400.166488
69071,2022Y02M08D09H,E218M00S60.mp4,622,1535.029785,394.003143,1879.725098,885.271240,0.795332,1,0.0,2022Y02M08D09H/E218M00S60.mp4,169337.810190
69229,2022Y02M08D09H,E218M00S60.mp4,727,1381.337646,424.910095,1770.088379,860.244507,0.785547,1,0.0,2022Y02M08D09H/E218M00S60.mp4,169236.571366
69233,2022Y02M08D09H,E218M00S60.mp4,728,1357.865723,405.583008,1730.461914,856.210510,0.708954,1,0.0,2022Y02M08D09H/E218M00S60.mp4,167902.091153
...,...,...,...,...,...,...,...,...,...,...,...,...
724967,2022Y02M07D16H,E233M00S60.mp4,229,431.785461,169.751770,473.072449,217.298126,0.664797,1,0.0,2022Y02M07D16H/E233M00S60.mp4,1963.045805
697811,2022Y02M07D10H,E242M00S60.mp4,1025,383.722260,329.184814,425.192413,376.025757,0.755635,1,0.0,2022Y02M07D10H/E242M00S60.mp4,1942.501085
63664,2022Y02M05D09H,E216M00S60.mp4,780,962.306519,454.504669,1001.557861,502.924530,0.653300,1,0.0,2022Y02M05D09H/E216M00S60.mp4,1900.544555
902380,2022Y02M07D08H,E202M00S60.mp4,321,722.784424,650.806396,754.794556,709.480591,0.682946,1,0.0,2022Y02M07D08H/E202M00S60.mp4,1878.168696


In [20]:
df_faces.sort_values(by='conf', ascending=False)

Unnamed: 0,directory,video,frame_number,x1,y1,x2,y2,conf,class,iou_poster,path_video,box_square
125404,2022Y02M05D13H,E232M00S60.mp4,873,417.231903,684.789062,511.624695,794.251099,0.886283,1,0.0,2022Y02M05D13H/E232M00S60.mp4,10332.427181
129751,2022Y02M07D20H,E229M00S60.mp4,375,558.647217,775.291626,662.501831,892.418884,0.879011,1,0.0,2022Y02M07D20H/E229M00S60.mp4,12164.206230
125401,2022Y02M05D13H,E232M00S60.mp4,872,420.375549,685.086670,514.959778,791.947815,0.875940,1,0.0,2022Y02M05D13H/E232M00S60.mp4,10107.378960
529142,2022Y02M07D13H,E233M00S60.mp4,49,467.112976,471.080078,540.676819,537.513733,0.872307,1,0.0,2022Y02M07D13H/E233M00S60.mp4,4887.114935
126189,2022Y02M05D13H,E232M00S60.mp4,984,540.149536,528.254944,622.790894,606.548279,0.871044,1,0.0,2022Y02M05D13H/E232M00S60.mp4,6470.267478
...,...,...,...,...,...,...,...,...,...,...,...,...
628304,2022Y02M07D15H,E241M00S60.mp4,598,444.485321,665.849609,524.296509,738.705444,0.650056,1,0.0,2022Y02M07D15H/E241M00S60.mp4,5814.710722
418352,2022Y02M05D07H,E232M00S60.mp4,1053,774.515747,483.589111,849.948364,561.042908,0.650034,1,0.0,2022Y02M05D07H/E232M00S60.mp4,5842.542573
500072,2022Y02M07D08H,E220M00S60.mp4,774,294.379456,382.519318,369.472931,477.310638,0.650015,1,0.0,2022Y02M07D08H/E220M00S60.mp4,7118.209711
634315,2022Y02M07D15H,E245M00S60.mp4,999,509.810120,386.536499,575.573181,451.910217,0.650012,1,0.0,2022Y02M07D15H/E245M00S60.mp4,4299.175856


In [21]:
def plot_image(directory, video, frame_num, plot=True):
    path_video = os.path.join('records_callcenter',directory, video)
    # print(os.path.join(directory, video), 'frame_num:', frame_num)
    # fr_iterator = FramesIterator(path_video, span_sec=0)
    # for frame, cur_frame,  frame_time in fr_iterator:
    #     if cur_frame == frame_num:
    #         if plot:
    #             imshow(frame)
    #         return frame
    cap = cv2.VideoCapture(path_video)
    cap.set(1,frame_num)
    succes, frame = cap.read()
    if plot:
        imshow(frame)
    cap.release()
    return frame

In [22]:
df_faces.sort_values(by='box_square', ascending=False).iloc[:10]

Unnamed: 0,directory,video,frame_number,x1,y1,x2,y2,conf,class,iou_poster,path_video,box_square
65500,2022Y02M08D09H,E220M00S60.mp4,0,1321.21167,0.0,1857.300049,570.485046,0.673629,1,0.0,2022Y02M08D09H/E220M00S60.mp4,305830.403708
66020,2022Y02M08D09H,E220M00S60.mp4,109,1253.71167,0.0,1749.082031,537.779785,0.664065,1,0.0,2022Y02M08D09H/E220M00S60.mp4,266400.166488
69071,2022Y02M08D09H,E218M00S60.mp4,622,1535.029785,394.003143,1879.725098,885.27124,0.795332,1,0.0,2022Y02M08D09H/E218M00S60.mp4,169337.81019
69229,2022Y02M08D09H,E218M00S60.mp4,727,1381.337646,424.910095,1770.088379,860.244507,0.785547,1,0.0,2022Y02M08D09H/E218M00S60.mp4,169236.571366
69233,2022Y02M08D09H,E218M00S60.mp4,728,1357.865723,405.583008,1730.461914,856.21051,0.708954,1,0.0,2022Y02M08D09H/E218M00S60.mp4,167902.091153
69241,2022Y02M08D09H,E218M00S60.mp4,730,1295.743774,378.701752,1658.19397,839.923828,0.847138,1,0.0,2022Y02M08D09H/E218M00S60.mp4,167170.031679
69060,2022Y02M08D09H,E218M00S60.mp4,619,1426.680664,370.43103,1786.771484,827.604309,0.842886,1,0.0,2022Y02M08D09H/E218M00S60.mp4,164623.900991
69245,2022Y02M08D09H,E218M00S60.mp4,731,1268.054565,374.181946,1631.150146,827.376526,0.820407,1,0.0,2022Y02M08D09H/E218M00S60.mp4,164552.949384
69236,2022Y02M08D09H,E218M00S60.mp4,729,1324.201416,397.776917,1689.875488,840.542908,0.842769,1,0.0,2022Y02M08D09H/E218M00S60.mp4,161908.043067
69074,2022Y02M08D09H,E218M00S60.mp4,623,1569.350098,417.976318,1906.824829,884.753723,0.802701,1,0.0,2022Y02M08D09H/E218M00S60.mp4,157525.579325


In [23]:
# # max box square in video
# df_faces['count_max_box_square'] = df_faces.groupby(['path_video'])['box_square'].transform(max)
# df_faces

In [24]:
# videos_with_all_faces = df_faces.path_video.tolist()
# videos_with_faces = []
# [videos_with_faces.append(x) for x in videos_with_all_faces if x not in videos_with_faces]
# with open('videos_with_faces.txt', 'w', encoding='UTF8') as f:
#     f.write('\n'.join(videos_with_faces))

In [25]:
# df_faces = df_faces[df_faces['count_max_box_square'] == df_faces['box_square']]
# df_faces

In [26]:
# df_faces.drop(columns=['count_max_box_square'], axis=1, inplace=True)
# df_faces

In [67]:
# ! mkdir output_faces_images
# ! mkdir output_faces_images/records_callcenter

In [68]:
# for ind, row in tqdm(df_faces.iterrows(), total=df_faces.shape[0]):
#     video_path = os.path.join('records_callcenter', row[0], row[1])
#     fr_iterator = FramesIterator(video_path, span_sec=1)
#     for frame, curr_frame, frame_time in fr_iterator:
#         if curr_frame == int(row[2]):
#             print(ind, row)
#             start_point = (int(row[3]), int(row[4]))
#             end_point = (int(row[5]), int(row[6]))
#             # cv2.rectangle(frame, start_point, end_point, color, 2)
#             # if not os.path.exists(os.path.join('output_faces_images/records_callcenter',row[0])):
#             #     os.mkdir(os.path.join('output_faces_images/records_callcenter',row[0]))
#             cv2.imwrite(os.path.join('output_faces_images/records_callcenter',row[0]+'_'+row[1] + '_frame_num' + str(curr_frame) + '.png'), frame)
#             # print(os.path.join('output_faces_images/records_callcenter',row[0]+'_'+row[1][:-4] + '_frame_num' + str(curr_frame) + '.png'))
#             continue
#     # cv2.rectangle(frame, start_point, end_point, color, 2)

In [149]:
import cv2
import mediapipe as mp
import numpy as np
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.7, min_tracking_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils
dist_threshold = 30


def head_rotation(directory,video,frame_num,x1,y1,x2,y2):
    
    drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
    image = plot_image(directory,video,frame_num, False)

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    x1_new, y1_new, x2_new, y2_new = x1 * 0.9, y1 * 0.9, x2 * 1.1, y2 * 1.1
    crop_img = image[int(y1_new): int(y2_new),int(x1_new):int(x2_new),:]
    image = cv2.resize(crop_img, (272,272), interpolation = cv2.INTER_CUBIC)

    # To improve performance
    image.flags.writeable = False

    results = face_mesh.process(image)

    # To improve performance
    image.flags.writeable = True

    # Convert the color space from RGB to BGR
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    img_h, img_w, img_c = image.shape
    face_3d = []
    face_2d = []

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            for idx, lm in enumerate(face_landmarks.landmark):
                if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:
                    if idx == 1:
                        nose_2d = (lm.x * img_w, lm.y * img_h)
                        nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 3000)

                    x, y = int(lm.x * img_w), int(lm.y * img_h)

                    # Get the 2D Coordinates
                    face_2d.append([x, y])

                    # Get the 3D Coordinates
                    face_3d.append([x, y, lm.z])
                if idx == 123 :
                    left_ear =  (lm.x * img_w, lm.y * img_h)
                elif idx == 352:
                    right_ear = (lm.x * img_w, lm.y * img_h)
                elif idx == 377:
                    chin = (lm.x * img_w, lm.y * img_h)
                elif idx == 10:
                    forehead = (lm.x * img_w, lm.y * img_h)
            

            # Convert it to the NumPy array
            face_2d = np.array(face_2d, dtype=np.float64)

            # Convert it to the NumPy array
            face_3d = np.array(face_3d, dtype=np.float64)

            # The camera matrix
            focal_length = 1 * img_w

            cam_matrix = np.array([ [focal_length, 0, img_h / 2],
                                    [0, focal_length, img_w / 2],
                                    [0, 0, 1]])

            # The distortion parameters
            dist_matrix = np.zeros((4, 1), dtype=np.float64)

            # Solve PnP
            success, rot_vec, trans_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, dist_matrix)

            # Get rotational matrix
            rmat, jac = cv2.Rodrigues(rot_vec)

            # Get angles
            angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)

            # Get the y rotation degree
            x = angles[0] * 360
            y = angles[1] * 360
            z = angles[2] * 360

            # See where the user's head tilting
            # if y < -20:
            #     text = "Left"
            # elif y > 20:
            #     text = "Right"
            # elif x < -20:
            #     text = "Down"
            # elif x > 20:
            #     text = "Up"
            # else:
            #     text = "Forward"
            if nose_2d[0] - left_ear[0] > dist_threshold and right_ear[0] - nose_2d[0] > dist_threshold and \
                    (forehead[1]-nose_2d[1]) / (nose_2d[1] - chin[1]) < 2:
                # print('nose:',nose_2d)
                # print('ears:',left_ear, right_ear)
                # print('chin:', chin)
                # print('forehead:', forehead)
                # print((forehead[1]-nose_2d[1]) / (nose_2d[1] - chin[1]))

            # Display the nose direction
                nose_3d_projection, jacobian = cv2.projectPoints(nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix)

                p1 = (int(nose_2d[0]), int(nose_2d[1]))
                p2 = (int(nose_2d[0] + y * 10) , int(nose_2d[1] - x * 10))

                # cv2.circle(image, p1,1, (0, 0, 255), 2)
                # cv2.circle(image, p2,1, (0, 255, 0), 2)

                # cv2.line(image, p1, p2, (255, 0, 0), 3)

                # Add the text on the image
                # cv2.putText(image, text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
                # cv2.putText(image, "x: " + str(np.round(x,2)), (500, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                # cv2.putText(image, "y: " + str(np.round(y,2)), (500, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                # cv2.putText(image, "z: " + str(np.round(z,2)), (500, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

                # mp_drawing.draw_landmarks(
                #             image=image,
                #             landmark_list=face_landmarks,
                #             connections=mp_face_mesh.FACEMESH_CONTOURS,
                #             landmark_drawing_spec=drawing_spec,
                #             connection_drawing_spec=drawing_spec)
                image_name = os.path.join('output_faces', '_'.join(map(str, [directory,video[:-4],frame_num,'.png'])))
                # cv2.imwrite(image_name, image)
                imshow(image)

In [None]:
for index, row in tqdm(df_faces.sort_values(by='box_square', ascending=False).iterrows(), total=df_faces.shape[0]):
    # plot_image(row.directory, row.video, row.frame_number)
    head_rotation(row.directory,row.video,frame_num=row.frame_number,x1=row.x1,y1=row.y1,x2=row.x2,y2=row.y2)