In [26]:
%matplotlib notebook
%matplotlib inline
import math
import cv2
import json
import numpy as np
import matplotlib.pyplot as plt
import torch
import matplotlib.animation as animation
import matplotlib.cm as cm
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
from scipy.stats import skew, kurtosis
from sklearn.cluster import KMeans, DBSCAN
plt.rcParams["animation.html"] = "jshtml"
plt.rcParams["image.cmap"] = 'hsv'
plt.rcParams["animation.embed_limit"] = 1866

In [2]:
def video_read(video_path):
    video = cv2.VideoCapture(video_path)
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    buf = np.empty((frame_count, frame_height, frame_width, 3), np.dtype('uint8'))
    fc = 0
    ret = True
    while fc < frame_count and ret:
        ret, frame = video.read()
        if ret:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            buf[fc] = frame
            fc += 1
    video.release()
    return fc, buf

In [3]:
def video_write(buf, out_path):
    out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (buf.shape[2], buf.shape[1]), isColor=False)
    buf = buf.astype('uint8')
    kernel = np.ones((5,5),np.uint8)
    for frame in buf:
        out.write(cv2.morphologyEx(frame, cv2.MORPH_CLOSE, kernel))
    out.release()

In [4]:
def refine_faces(faces, probability_threshold=0.35):
    faces = list(filter(lambda face: np.mean(face["probs"]) / 100 > probability_threshold, faces))
    def interp_array(arr):
        l = len(arr)
        fp = arr[arr > 0]
        xp = (arr > 0) * (np.arange(l) + 1) 
        xp = xp[xp > 0] - 1
        return np.interp(np.arange(l), xp, fp)
    for face in faces:
        face["centers_x"] = interp_array(np.array(face["centers_x"]))
        face["centers_y"] = interp_array(np.array(face["centers_y"]))
        face["widths"] = interp_array(np.array(face["widths"]))
        face["heights"] = interp_array(np.array(face["heights"]))
        face["probs"] = np.array(face["probs"])
    return faces

In [5]:
def crop_faces(video, faces_info, size=350):
    video_height = video.shape[1]
    video_width = video.shape[2]
    def top(y):
        return 0 if y < size / 2 else video_height - size if y + (size / 2) > video_height else int(y - (size / 2))
    def bottom(y):
        return size if y < size / 2 else video_height if y + (size / 2) > video_height else int(y + (size / 2))
    def left(x):
        return 0 if x < size / 2 else video_width - size if x + (size / 2) > video_width else int(x - (size / 2))
    def right(x):
        return size if x < size / 2 else video_width if x + (size / 2) > video_width else int(x + (size / 2))
    faces_crops = [[(frame, top(y), bottom(y), left(x), right(x))
             for frame, (x, y) in enumerate(zip(face["centers_x"], face["centers_y"]))]
             for face in faces_info]
    return np.stack(
        [np.stack(
            [video[frame, top:bottom, left:right, :] for frame, top, bottom, left, right in crops]
        ) for crops in faces_crops])

In [6]:
def read_metadata(path):
    metadata_file = open(path)
    metadata = json.load(metadata_file)
    metadata_file.close()
    return metadata

In [7]:
dir = "D:\\Projects\\DFDC\\Data"
metadata_file_name = 'metadata-processed.json'

metadata = read_metadata("{}\\{}".format(dir, metadata_file_name))

In [8]:
video_name = 'akfrnoqubc.mp4'
video_info = metadata[video_name]
video_sub_dir = video_info['dir']
if video_info['label'] == 'FAKE':
    fake_names = [video_name]
    real_name = video_info['original']
elif video_info['label'] == 'REAL':
    real_name = video_name
    fake_names = video_info['fakes']
else:
    real_name = video_name
    fake_names = []
real_info = metadata[real_name]
if 'faces' not in real_info:
    print("Not enough metadata")
    raise TypeError("Not enough metadata")
print("Original Video: {}".format(real_name))
print("Fake Videos: {}".format(fake_names))
fake_order = 0
fake_name = fake_names[fake_order]

Original Video: xbvjrriwxn.mp4
Fake Videos: ['akfrnoqubc.mp4']


In [9]:
real_fc, real_video = video_read("{}\\{}\\{}".format(dir, video_sub_dir, real_name))
fake_fc, fake_video = video_read("{}\\{}\\{}".format(dir, video_sub_dir, fake_name))
fc = min(real_fc, fake_fc)
real_video = real_video[:fc]
fake_video = fake_video[:fc]
print("Number of frames: {0}".format(fc))

Number of frames: 300


In [10]:
faces_info = real_info['faces']
print('Video has {} suspected faces. Refining faces...'.format(len(faces_info)))
faces_info = refine_faces(faces_info)
print('{} faces were refined'.format(len(faces_info)))

Video has 2 suspected faces. Refining faces...
2 faces were refined


In [11]:
real_faces = crop_faces(real_video, faces_info)
fake_faces = crop_faces(fake_video, faces_info)

In [12]:
diff = np.sqrt(np.sum(np.square(np.subtract(real_faces, fake_faces)), axis=4))

In [13]:
diff_min = np.min(diff)
print("Diff min: {0}".format(diff_min))
diff_max = np.max(diff)
print("Diff max: {0}".format(diff_max))

Diff min: 0.0
Diff max: 27.331300737432898


In [23]:
top_percentage = 2
threshold = np.percentile(diff, 100 - (top_percentage / diff.shape[0]))
print("Threshold: {0}".format(threshold))

Threshold: 18.384776310850235


In [15]:
# plt.hist(diff.flatten(), bins=range(math.ceil(diff_max)))
# plt.plot([threshold, threshold], [0, 1000000], linestyle='--', scalex=False)
# plt.show()

In [16]:
# gray_diff = (diff - diff_min) * (255 / (diff_max - diff_min))
# gray_threshold = (threshold - diff_min) * (255 / (diff_max - diff_min))

In [24]:
mask = diff > threshold
filtered_diff = diff * mask

In [18]:
# %%capture
# diff_ani = draw_videos([real_faces, filtered_diff, fake_faces], [cm.viridis, cm.gray, cm.viridis], ["Original", "Diff", "Fake"], fc)
# diff_ani = draw_videos([real_faces, filtered_diff], [cm.viridis, cm.gray], ["Original", "Diff"], fc)

In [19]:
# diff_ani

In [33]:
# X = np.stack([np.mean(filtered_diff, axis=(2, 3))], axis=2)
X = np.stack((np.mean(filtered_diff, axis=(2, 3)), np.std(filtered_diff, axis=(2,3))), axis=2)
# X = np.reshape(filtered_diff, (filtered_diff.shape[0], filtered_diff.shape[1], filtered_diff.shape[2] * filtered_diff.shape[3]))
print(X)
# kmeans = [KMeans(n_clusters=2, random_state=0).fit(x) for x in X]
# for kmean in kmeans:
#     print(kmean.inertia_)
#     print(kmean.labels_)
#     print(kmean.cluster_centers_)
#     print(kmean.score(kmean.cluster_centers_ - 1))
# wrong = kmeans[0].labels_ - kmeans[1].labels_
dbscans = [DBSCAN(eps=0.25, min_samples=1).fit(x) for x in X]
for dbscan in dbscans:
    print(dbscan.labels_)
wrong = dbscans[0].labels_ - dbscans[1].labels_
print(wrong)
wrong_index = []
for i, x in enumerate(wrong):
    if x != 0:
        wrong_index.append(i)
print(wrong_index)
for x, dbscan in zip(X, dbscans):
    print(dbscan.predict([x[index] for index in wrong_index]))
    print([dbscan.score([x[index]]) for index in wrong_index])

[[[0.01043368 0.46122874]
  [0.00961949 0.44303835]
  [0.02462668 0.71047387]
  ...
  [0.17433562 1.88921292]
  [0.18974124 1.9764817 ]
  [0.19416869 2.00309876]]

 [[0.66943944 3.68430951]
  [0.67035132 3.68691793]
  [0.69042937 3.7427886 ]
  ...
  [0.12238578 1.59401729]
  [0.0969571  1.42094943]
  [0.09559702 1.40663877]]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 1 1 1 1 1 1 1 1 0 0 1 1 0 1 1 0 0 0 0 0 0 1 1 0 0 1 1 1 1 0 1 1 1 0
 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 1 1 0 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0

AttributeError: 'DBSCAN' object has no attribute 'predict'

In [21]:
# def video_write(buf, out_path):
#     out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (buf.shape[2], buf.shape[1]), isColor=False)
#     buf = buf.astype('uint8')
#     kernel = np.ones((2,2),np.uint8)
#     for frame in buf:
#         f = frame
# #         f = cv2.morphologyEx(cv2.morphologyEx(f, cv2.MORPH_CLOSE, kernel), cv2.MORPH_OPEN, kernel)
# #         f = cv2.morphologyEx(cv2.morphologyEx(f, cv2.MORPH_CLOSE, kernel), cv2.MORPH_OPEN, kernel)
# #         f = cv2.morphologyEx(cv2.morphologyEx(f, cv2.MORPH_CLOSE, kernel), cv2.MORPH_OPEN, kernel)
# #         f = cv2.morphologyEx(cv2.morphologyEx(f, cv2.MORPH_CLOSE, kernel), cv2.MORPH_OPEN, kernel)
#         out.write(f)
#     out.release()

# for i, face_diff in enumerate(filtered_diff):
#     out_path = "{}\\{}\\{}_face_diff_{}.mp4".format(dir, video_sub_dir, fake_name[:fake_name.find('.')], i)
#     print(out_path)
#     video_write(face_diff, out_path)

In [22]:
# def draw_videos(videos_cols, columns_color_maps, columns_titles, fc):
#     cols = len(videos_cols)
#     rows = max(map(lambda col: len(col), videos_cols))
#     fig, ax = plt.subplots(nrows=rows, ncols=cols, squeeze=False, figsize=(10 * cols, 10 * rows))
#     #TO CHECK
#     im = [[ax[j, i].imshow(video[0], cmap=columns_color_maps[i], animated=True)
#            for j, video in enumerate(col)] 
#           for i, col in enumerate(videos_cols)]      
#     for i, title in enumerate(columns_titles): ax[0][i].title.set_text(title)
#     def draw(frame_count):
#         # TO CHECK
#         [im[i][j].set_data(video[frame_count])
#          for i, col in enumerate(videos_cols)
#          for j, video in enumerate(col)]
#     ani = animation.FuncAnimation(fig, draw, interval=30, save_count=fc)
#     return ani