In [1]:
# Warning: You must have LaTeX installed

[![Binder](https://mybinder.org/badge_logo.svg)](https://nbviewer.org/github/vicente-gonzalez-ruiz/motion_estimation/blob/main/src/motion_estimation/full_search_block_ME.ipynb)

[![Colab](https://badgen.net/badge/Launch/on%20Google%20Colab/blue?icon=notebook)](https://colab.research.google.com/github/vicente-gonzalez-ruiz/motion_estimation/blob/main/src/motion_estimation/full_search_block_ME.ipynb)

# Full search block-based ME (Motion Estimation)
The predicted frame is divided into blocks and each one is characterized by a motion vector using exhaustive search. This guarantees to reach the global optimal (the best motion field).

In [2]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

if IN_COLAB:
    !apt update
    !apt install imagemagick
    !apt install cm-super
    !apt install dvipng
    !apt install bc
    !apt install texlive-latex-extra
    !apt install texlive-fonts-recommended

In [3]:
!which python

/shared/PEs/opencv-cuda/bin/python


In [4]:
!pip install --upgrade pip



In [5]:
try:
    import numpy as np
except:
    !pip install numpy
    import numpy as np

In [6]:
try:
    from scipy import ndimage
except:
    !pip install scipy
    from scipy import ndimage

In [7]:
try:
    import matplotlib
    import matplotlib.pyplot as plt
    import matplotlib.axes as ax
    import pylab
except:
    !pip install matplotlib
    import matplotlib
    import matplotlib.pyplot as plt
    import matplotlib.axes as ax
    import pylab

plt.rcParams["text.usetex"] = True
plt.rcParams["text.latex.preamble"] = r"\usepackage{amsmath}" #for \text command
   
%matplotlib inline

Collecting matplotlib
  Downloading matplotlib-3.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.48.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (158 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.9/158.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m[31m4.5 MB/s[0m eta [36m0:00:01[0m
[?25hCollecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.4 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Downloading pyparsing-3.1.1-py3-none-any.whl.metadata (5.1 kB)
Downloading ma

In [8]:
try:
    from color_transforms import YCoCg
except:
    !pip install "color_transforms @ git+https://github.com/vicente-gonzalez-ruiz/color_transforms"
    from color_transforms import YCoCg

Collecting color_transforms@ git+https://github.com/vicente-gonzalez-ruiz/color_transforms
  Cloning https://github.com/vicente-gonzalez-ruiz/color_transforms to /tmp/pip-install-au9m3l0r/color-transforms_949557e5e4fb4e008eaf7dafd17a5bdc
  Running command git clone --filter=blob:none --quiet https://github.com/vicente-gonzalez-ruiz/color_transforms /tmp/pip-install-au9m3l0r/color-transforms_949557e5e4fb4e008eaf7dafd17a5bdc
  Resolved https://github.com/vicente-gonzalez-ruiz/color_transforms to commit 8dcb71d015db080c1053ea654f528e5ebac32122
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hBuilding wheels for collected packages: color_transforms
  Building wheel for color_transforms (pyproject.toml) ... [?25ldone
[?25h  Created wheel for color_transforms: filename=color_transforms-0.0.1-py3-none-any.whl size=3926 sha256=0c9a82f82669f5e9eaab66262041e98bcfd6728131a17

In [9]:
try:
    from image_IO import image_1 as gray_image
    from image_IO import image_3 as RGB_image
except:
    !pip install "image_IO @ git+https://github.com/vicente-gonzalez-ruiz/image_IO"
    from image_IO import image_1 as gray_image
    from image_IO import image_3 as RGB_image

Collecting image_IO@ git+https://github.com/vicente-gonzalez-ruiz/image_IO
  Cloning https://github.com/vicente-gonzalez-ruiz/image_IO to /tmp/pip-install-6kpeid6_/image-io_cae3918aca1243ebbdb0a57618b716ca
  Running command git clone --filter=blob:none --quiet https://github.com/vicente-gonzalez-ruiz/image_IO /tmp/pip-install-6kpeid6_/image-io_cae3918aca1243ebbdb0a57618b716ca
  Resolved https://github.com/vicente-gonzalez-ruiz/image_IO to commit 5dff57f270436b2f3de403e9ae1eeb2ee7be5712
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting opencv-python (from image_IO@ git+https://github.com/vicente-gonzalez-ruiz/image_IO)
  Downloading opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting colored (from image_IO@ git+https://github.com/vicente-gonzalez-ruiz/im

ModuleNotFoundError: No module named 'image_IO'

In [None]:
try:
    from information_theory import information
    #from information_theory import distortion
except:
    !pip install "information_theory @ git+https://github.com/vicente-gonzalez-ruiz/information_theory"
    from information_theory import information
    #from information_theory import distortion

In [None]:
try:
    import full_search
    import display
    import predict
except:
    !pip install "motion_estimation @ git+https://github.com/vicente-gonzalez-ruiz/motion_estimation"
    from motion_estimation import full_search
    from motion_estimation import display
    from motion_estimation import predict

In [None]:
%%bash
if [ -d "$HOME/repos" ]; then
    echo "\"$HOME/repos\" exists"
else
    mkdir ~/repos
    echo Created $HOME/repos
fi

In [None]:
%%bash
if [ -d "$HOME/repos/image_synthesis" ]; then
    cd $HOME/repos/image_synthesis
    echo "$HOME/repos/image_synthesis ... "
    git pull 
else
    cd $HOME/repos
    git clone https://github.com/vicente-gonzalez-ruiz/image_synthesis.git
fi

## Create a moving-circles sequence
There are two circles, moving horizontally in oposite directions.

In [None]:
%%bash
frames=5
~/repos/image_synthesis/moving_circle.sh -o /tmp/right -x 32 -y 16 -w 64 -h 32 -f $frames -d 10
~/repos/image_synthesis/moving_circle.sh -o /tmp/left -x 32 -y 16 -w 64 -h 32 -f $frames -d 10 -a 0 -b -1
set -x
i=0
while [ $i -le $((frames-1)) ]
do
    ii=$(printf "%03d" $i)
    convert -append /tmp/right${ii}.png /tmp/left${ii}.png /tmp/${ii}.png
    i=$(( $i + 1 ))
done
set -x

## Testing with moving circles (max_abs_motion=1)

In [None]:
R = RGB_image.read("/tmp/000.png").astype(np.int16)[...,0]
P = RGB_image.read("/tmp/001.png").astype(np.int16)[...,0]

In [None]:
#home = os.environ["HOME"]
#R = YUV.from_RGB(image_3.read(home + "/MRVC/sequences/moving_circles/", 0).astype(np.int16))[...,0]
#P = YUV.from_RGB(image_3.read(home + "/MRVC/sequences/moving_circles/", 1).astype(np.int16))[...,0]

In [None]:
gray_image.show(R, "reference $R$")

In [None]:
gray_image.show(P, "predicted $P$")

The top circle moves to the right and the bottom circle moves to the left. Therefore, if we want to generate the predicted frame (bottom) from the reference one (top), all the top MVs (Motion Vectors) related to the circle should be (x=1, y=0), and all the bottom MVs (-1, 0).

In [None]:
block_side = 32
max_abs_motion = 2
MVs = full_search.block_ME(P, R, block_side=block_side, max_abs_motion=max_abs_motion)

In [None]:
MVs.shape

In [None]:
for y in range(P.shape[0]//block_side):
    for x in range(P.shape[1]//block_side):
        print(MVs[y, x], end=' ')
    print()

In [None]:
display.show_vectors(MVs[::1, ::1])

## Interpolate the MVs
The predictor expects a dense motion field.

In [None]:
_MVs = np.empty((P.shape[0], P.shape[1], 2), dtype=np.float32)
print(_MVs.shape)
_MVs[..., 0] = ndimage.zoom(MVs[..., 0], P.shape[0]/MVs.shape[0], order=0)
_MVs[..., 1] = ndimage.zoom(MVs[..., 1], P.shape[0]/MVs.shape[0], order=0)

In [None]:
display.show_vectors(_MVs[::1, ::1])

## Make a prediction

In [None]:
hat_P = predict.warp(R, _MVs)

In [None]:
gray_image.show(R, "reference ${\mathbf R}$")
gray_image.show(P, "predicted $P$")
gray_image.show(hat_P, "prediction $\hat{P}$")
gray_image.show(P - hat_P, "prediction error")

## Testing with moving circles (max_abs_motion=2)

In [None]:
R = RGB_image.read("/tmp/000.png").astype(np.int16)[...,0]
P = RGB_image.read("/tmp/002.png").astype(np.int16)[...,0]
#R = YUV.from_RGB(image_3.read("/home/vruiz/MRVC/sequences/moving_circles/", 0).astype(np.int16))[...,0]
#P = YUV.from_RGB(image_3.read("/home/vruiz/MRVC/sequences/moving_circles/", 2).astype(np.int16))[...,0]

In [None]:
gray_image.show(R, "reference ${\mathbf R}$")
gray_image.show(P, "predicted ${\mathbf P}$")

In [None]:
block_side = 16
max_abs_motion = 4
MVs = full_search.block_ME(P, R, block_side=block_side, max_abs_motion=max_abs_motion)

In [None]:
display.show_vectors(MVs[::1, ::1])

In [None]:
_MVs = np.empty((P.shape[0], P.shape[1], 2), dtype=np.float32)
_MVs[..., 0] = ndimage.zoom(MVs[..., 0], P.shape[0]/MVs.shape[0], order=0)
_MVs[..., 1] = ndimage.zoom(MVs[..., 1], P.shape[0]/MVs.shape[0], order=0)
display.show_vectors(_MVs[::1, ::1])

In [None]:
hat_P = predict.warp(R, _MVs)
gray_image.show(hat_P, "prediction $\hat{\mathbf P}$")
gray_image.show(P - hat_P, "error")

## Even more distance

In [None]:
R = RGB_image.read("/tmp/000.png").astype(np.int16)[...,0]
P = RGB_image.read("/tmp/004.png").astype(np.int16)[...,0]
#R = YUV.from_RGB(image_3.read("/home/vruiz/MRVC/sequences/moving_circles/", 0).astype(np.int16))[...,0]
#P = YUV.from_RGB(image_3.read("/home/vruiz/MRVC/sequences/moving_circles/", 4).astype(np.int16))[...,0]

In [None]:
gray_image.show(R, "reference ${\mathbf R}$")
gray_image.show(P, "predicted ${\mathbf P}$")

In [None]:
block_side = 16
max_abs_motion = 8
MVs = full_search.block_ME(P, R, block_side=block_side, max_abs_motion=max_abs_motion)

In [None]:
display.show_vectors(MVs[::1, ::1])

In [None]:
_MVs = np.empty((P.shape[0], P.shape[1], 2), dtype=np.float32)
_MVs[..., 0] = ndimage.zoom(MVs[..., 0], P.shape[0]/MVs.shape[0], order=0)
_MVs[..., 1] = ndimage.zoom(MVs[..., 1], P.shape[0]/MVs.shape[0], order=0)
display.show_vectors(_MVs[::1, ::1])

In [None]:
hat_P = predict.warp(R, _MVs)
gray_image.show(hat_P, "prediction $\hat{\mathbf P}$")
gray_image.show(P-hat_P, "prediction error")

## Now with a real image
A tile of Stockholm.

In [None]:
%%bash
URL="https://hpca.ual.es/~vruiz/videos/"
sequence="stockholm_1280x768x50x420x578.avi"
output_prefix="/tmp/original_"
number_of_frames=16
first_frame=2
~/repos/image_synthesis/extract_frames.sh -u $URL -s $sequence -o $output_prefix -n $number_of_frames -f $first_frame

### ... but first without using ME
Notice that we work only with a tile of the original image.

In [None]:
slice_Y = slice(100,356) # 100:356
slice_X = slice(100,612) # 100:612
sequence = "/tmp/original_"
R = RGB_image.read(sequence + "002.png")[slice_Y, slice_X]
P = RGB_image.read(sequence + "003.png")[slice_Y, slice_X]
RGB_image.show(R, "reference ${\mathbf R}$")
entropy = information.entropy(P.flatten())
RGB_image.show(P, "predicted ${\mathbf P}$" + f" entropy={entropy:1.2f} bits/component")
predicted_entropy = entropy
P_R = np.clip(P.astype(np.int16) - R + 128, 0, 255)
entropy = information.entropy(P_R.flatten())
RGB_image.show(P_R.astype(np.uint8), "(No ME) $({\mathbf P} - {\mathbf R})$" + f" entropy={entropy:1.2f} bits/component")

### ... and now using BBME (block-based motion estimation)

In [None]:
block_side = 32
max_abs_motion = 8
MVs = full_search.block_ME(P, R, block_side=block_side, max_abs_motion=max_abs_motion)

In [None]:
entropy = information.entropy(MVs.flatten())
motion_entropy = entropy
display.show_vectors(MVs[::1, ::1], title="${\mathbf V}$" + f", {block_side}x{block_side} ME" + f", entropy={entropy:1.2f} bits/component" +  f", {MVs.shape[0]}x{MVs.shape[1]} vectors")

In [None]:
P.shape[0]/block_side

In [None]:
P.shape[1]/block_side

In [None]:
zoom_MVs = np.empty((P.shape[0], P.shape[1], 2), dtype=np.float32)
print(_MVs.shape)
zoom_MVs[..., 0] = ndimage.zoom(MVs[..., 0], P.shape[0]/MVs.shape[0], order=0)
zoom_MVs[..., 1] = ndimage.zoom(MVs[..., 1], P.shape[0]/MVs.shape[0], order=0)

In [None]:
display.show_vectors(zoom_MVs[::10, ::10])

In [None]:
hat_P = predict.warp(R, zoom_MVs)
RGB_image.show(P, "predicted ${\mathbf P}$" + f" entropy={predicted_entropy:1.2f} bits/component")
RGB_image.show(hat_P, "$\hat{\mathbf P}$" + f", {block_side}x{block_side} ME")
P_hat_P = P - hat_P + 128
entropy = information.entropy(P_hat_P.flatten())
residue_entropy = entropy
RGB_image.show(P_hat_P.astype(np.uint8), "${\mathbf P} - \hat{\mathbf P}$" + f", {block_side}x{block_side} ME" + f", entropy={entropy:1.2f} bits/component")

In [None]:
print("Entropy of the predicted frame:                 ", f"{predicted_entropy:1.3f}", "bits/component")
print("Entropy of the residue frame:                   ", f"{residue_entropy:1.3f}", "bits/component")
print("Entropy reduction in the texture:               ", f"{predicted_entropy - residue_entropy:1.3f}", "bits/channel")
print("Entropy of the components of the motion vectors:", f"{motion_entropy:1.3f}", "bits/component")
texture_length = residue_entropy * P.size
motion_length = motion_entropy * MVs.size
total_length = texture_length + motion_length
total_entropy = total_length / P.size
print("Entropy the texture + motion vectors:           ", f"{total_entropy:1.3f}", "bits/component")

## Evaluate (visually) the ME on other sequences (TODO)