In [None]:
import os, sys
import git

FFMPEG = '/mmfs1/gscratch/cse/bandhav/miniconda3/envs/avhubert/bin/ffmpeg'
os.environ['PATH'] = f'{os.path.dirname(FFMPEG)}:' + os.environ['PATH']
!which ffmpeg
!which ffprobe

root = git.Repo('.', search_parent_directories=True).working_tree_dir
work_dir = os.path.join(root, 'modules', 'av_hubert', 'avhubert')
os.chdir(work_dir)
print(f"Changed working directory to {work_dir}")

In [None]:
import dlib, cv2, os
import numpy as np
import skvideo
import skvideo.io
from tqdm import tqdm
from preparation.align_mouth import landmarks_interpolate, crop_patch, write_video_ffmpeg
from IPython.display import HTML
from base64 import b64encode

def play_video(video_path, width=200):
    mp4 = open(video_path,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML(f"""
    <video width={width} controls>
        <source src="{data_url}" type="video/mp4">
    </video>
    """)

def detect_landmark(image, detector, predictor):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    rects = detector(gray, 1)
    coords = None
    for (_, rect) in enumerate(rects):
        shape = predictor(gray, rect)
        coords = np.zeros((68, 2), dtype=np.int32)
        for i in range(0, 68):
            coords[i] = (shape.part(i).x, shape.part(i).y)
    return coords

def preprocess_video(input_video_path, output_video_path, face_predictor_path, mean_face_path):
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(face_predictor_path)
    STD_SIZE = (256, 256)
    mean_face_landmarks = np.load(mean_face_path)
    stablePntsIDs = [33, 36, 39, 42, 45]
    videogen = skvideo.io.vread(input_video_path)
    frames = np.array([frame for frame in videogen])
    landmarks = []
    for frame in tqdm(frames):
        landmark = detect_landmark(frame, detector, predictor)
        landmarks.append(landmark)
    preprocessed_landmarks = landmarks_interpolate(landmarks)
    rois = crop_patch(input_video_path, preprocessed_landmarks, mean_face_landmarks, stablePntsIDs, STD_SIZE, 
                        window_margin=12, start_idx=48, stop_idx=68, crop_height=96, crop_width=96)
    write_video_ffmpeg(rois, output_video_path, FFMPEG)
    return

play_video(f'/mmfs1/gscratch/intelligentsystems/bandhav/av-proc/data/misc/avhubert_demo_video_8s.mp4', width=300)

In [None]:
face_predictor_path = f"{root}/data/misc/shape_predictor_68_face_landmarks.dat"
mean_face_path = f"{root}/data/misc/20words_mean_face.npy"
origin_clip_path = f"{root}/data/misc/avhubert_demo_video_8s.mp4"
mouth_roi_path = f"{root}/data/roi.mp4"
preprocess_video(origin_clip_path, mouth_roi_path, face_predictor_path, mean_face_path)
play_video(mouth_roi_path)