#!/usr/bin/env python3 """ Process images from a video with linear motion for a visualisation of jittering Usage example: * Extract video frames with ffmpeg: `mkdir in && ffmpeg -i in.mp4 "in/img_%d.png"` * Figure out the motion vector: Open two images from in/ with GIMP, overlay them and move them to find the motion distance in pixels, and then divide the motion distance by the number of frames the to images are apart. * Execute this script to move every frame by a multiple of the motion: `./visualize_jitter.py in/ --motion -13.13 0 out/` * Convert the output images back into a video with ffmpeg: `ffmpeg -f image2 -framerate 20 -i out/img_%d.png out.mp4` """ import argparse import numpy as np from PIL import Image from pathlib import Path import warnings def sRGB_to_linear(arr): """ sRGB EOTF; input and output are in [0,1] """ return np.where(arr <= 0.04045, arr / 12.92, ((arr + 0.055) / 1.055) ** 2.4) def linear_to_sRGB(arr_lin): """ sRGB OETF; input is in [0,1] and output in {0,1,…,255}; out-of-bounds values are clipped """ with warnings.catch_warnings(): warnings.filterwarnings("ignore", "invalid value encountered in power") arr = np.where( arr_lin <= 0.0031308, 12.92 * arr_lin, 1.055 * arr_lin ** (1.0 / 2.4) - 0.055, ) return np.uint8(np.rint(arr * 255.0).clip(0, 255)) def load_image(image_file): """ Load an image file into a numpy array Args: input_image (str or Path): Path to the input image file Returns: np.array: Image data with shape (height, width, 3), where width and height is the image resolution and 3 is for the RGB channels. The data type is float32 and values are in [0,1]. """ image = np.array( Image.open(image_file).convert("RGB"), dtype=np.float32 ) * (1.0 / 255.0) return sRGB_to_linear(image) def save_image(image, output_path): """ Save image data to an image file Args: image (np.array): Image data in the same format as load_image's output output_path (str or Path): Path where the image is saved """ image = linear_to_sRGB(image) print(f"Saving image {output_path}") Image.fromarray(image, mode="RGB").save(output_path) def estimate_motion(input_folder, frame_1, frame_2): """ Search for a horizontal motion value using two frames Args: input_folder (str): Path to the input images frame_1 (int): First frame index frame_2 (int): Second frame index Returns: float: Estimated motion in X direction """ img1 = load_image(Path(input_folder) / f"img_{frame_1}.png") img2 = load_image(Path(input_folder) / f"img_{frame_2}.png") if img1.shape != img2.shape: raise RuntimeError("Images have different resolution") third_width = img1.shape[1] // 3 print(f"Searching {img1.shape[1]} pixel offsets for a motion estimation") # ~ img1 = img1 + np.roll(img1, 1, axis=1) # ~ img1 = img1 + np.roll(img1, 2, axis=1) # ~ img2 = img2 + np.roll(img2, 1, axis=1) # ~ img2 = img2 + np.roll(img2, 2, axis=1) best_roll = 0 best_err = np.inf for i in range(img1.shape[1]): offset = -i moved = np.roll(img2, offset, axis=1) diff = moved - img1 diff = diff[:,third_width:2*third_width:,:] err = np.reshape( diff, [diff.shape[0] * diff.shape[1] * diff.shape[2]] ) err = np.dot(err, err) if err < best_err: print(f"offset: {offset}, err: {err}") best_roll = offset best_err = err print(frame_2 - frame_1) found_roll = best_roll / (frame_2 - frame_1) print(f"Estimated pixel offset per frame: {found_roll}") return found_roll def frame_id_from_path(path): """ Determine which frame an image belongs to from its file path Args: path (Path): Path to the image file Returns: int: Frame index """ return int(path.stem.removeprefix("img_")) def roll_images(input_folder, output_folder, motion, difference): """ Move frames in the opposite direction of `motion` and save the result Args: input_folder (str): Path to the input images output_folder (str): Path where the processed images are saved motion (np.array): 2D float32 vector defining the linear movement of the input images difference (bool): If enabled, calculate the difference between successive frames """ image_files = list( sorted(Path(input_folder).glob("img_*.png"), key=frame_id_from_path) ) # ~ image_files = list(Path(input_folder).glob("img_*.png")) print(f"Processing {len(image_files)} image files") img_prev = None for image_path in Path(input_folder).glob("img_*.png"): offset = frame_id_from_path(image_path) * (-motion) img = load_image(image_path) img = np.roll(img, int(offset[0] + 0.5), axis=1) img = np.roll(img, int(offset[1] + 0.5), axis=0) if difference and img_prev is not None: diff_img = (img - img_prev) * 0.5 + 0.5 # ~ diff_img = np.abs(img - img_prev) save_image(diff_img, Path(output_folder) / image_path.name) else: save_image(img, Path(output_folder) / image_path.name) img_prev = img def parse_arguments(): """Read command line arguments""" parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "input_folder", help="Folder with images extracted from a video in the format " "img_.png. " ) parser.add_argument("output_folder", help="Folder to save generated images") motion_arg_group = parser.add_mutually_exclusive_group(required=True) motion_arg_group.add_argument( "--motion-frames", help="First and second frame index used for motion estimation. " "Motion estimation does not work right yet.", type=int, nargs=2, ) motion_arg_group.add_argument( "--motion", help="Horizontal and vertical motion", type=float, nargs=2, ) parser.add_argument( "--show-difference", help="Show the difference between frames in addition to moving them", action="store_true", ) return parser.parse_args() def main(): args = parse_arguments() input_folder = args.input_folder output_folder = args.output_folder if args.motion_frames: motion = estimate_motion( input_folder, args.motion_frames[0], args.motion_frames[1] ) motion = [motion, 0.0] elif args.motion: motion = args.motion motion = np.array(motion, dtype=np.float32) roll_images(input_folder, output_folder, motion, args.show_difference) if __name__ == "__main__": try: main() except KeyboardInterrupt: pass