In [1]:
import math

import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image


def get_integer_scale(scale, h, w):

    gcd = math.gcd(w, h)

    real_scale_gcd = round(gcd * scale)
    real_scale = real_scale_gcd / gcd

    if real_scale == 0.0 or math.fabs(real_scale - scale) > 0.1:
        raise Exception("scale {} cannot be effectively realized for w, h = {}, {} in integer domain".format(scale, w, h))

    return real_scale


def split_points(tentative_matches, kps0, kps1):
    src_pts = np.float32([kps0[m.queryIdx].pt for m in tentative_matches]).reshape(-1, 2)
    dst_pts = np.float32([kps1[m.trainIdx].pt for m in tentative_matches]).reshape(-1, 2)
    kps0 = [kps0[m.queryIdx] for m in tentative_matches]
    kps1 = [kps1[m.trainIdx] for m in tentative_matches]
    return src_pts, dst_pts, kps0, kps1


def get_tentatives(kpts0, desc0, kpts1, desc1, ratio_threshold):
    matcher = cv.BFMatcher(crossCheck=False)
    knn_matches = matcher.knnMatch(desc0, desc1, k=2)
    matches2 = matcher.match(desc1, desc0)

    tentative_matches = []
    for m, n in knn_matches:
        if matches2[m.trainIdx].trainIdx != m.queryIdx:
            continue

        if m.distance < ratio_threshold * n.distance:
            tentative_matches.append(m)

    src, dst, kpts0, kpts1 = split_points(tentative_matches, kpts0, kpts1)
    return src, dst, kpts0, kpts1, tentative_matches


def get_visible_part_mean_absolute_reprojection_error(img1, img2, H_gt, H):
    '''We reproject the image 1 mask to image2 and back to get the visible part mask.
    Then we average the reprojection absolute error over that area'''
    h, w = img1.shape[:2]
    mask1 = np.ones((h, w))
    mask1in2 = cv.warpPerspective(mask1, H_gt, img2.shape[:2][::-1])
    mask1inback = cv.warpPerspective(mask1in2, np.linalg.inv(H_gt), img1.shape[:2][::-1]) > 0
    xi = np.arange(w)
    yi = np.arange(h)
    xg, yg = np.meshgrid(xi, yi)
    coords = np.concatenate([xg.reshape(*xg.shape, 1), yg.reshape(*yg.shape, 1)], axis=-1)
    xy_rep_gt = cv.perspectiveTransform(coords.reshape(-1, 1, 2).astype(np.float32), H_gt.astype(np.float32)).squeeze(1)
    xy_rep_estimated = cv.perspectiveTransform(coords.reshape(-1, 1, 2).astype(np.float32),
                                               H.astype(np.float32)).squeeze(1)
    error = np.sqrt(((xy_rep_gt - xy_rep_estimated) ** 2).sum(axis=1)).reshape(xg.shape) * mask1inback
    mean_error = error.sum() / mask1inback.sum()
    return mean_error


def read_imgs(file_paths, show=False):
    imgs = []
    for i, file in enumerate(file_paths):
        img = Image.open(file)
        img = np.array(img)
        imgs.append(img)
        if show:
            plt.figure()
            plt.imshow(img)
            plt.title(i + 1)
            plt.show()
            plt.close()
    return imgs


def rotate(img, sin_a, cos_a, rotation_index):
    h, w = img.shape[:2]

    H_gt = np.array([
        [cos_a, sin_a, 0.],
        [-sin_a, cos_a, 0.],
        [0., 0., 1.],
    ])

    box = np.array([[0., 0., 1.], [0., h - 1, 1.], [w - 1, 0., 1.], [w - 1, h - 1, 1.]])
    box2 = (H_gt @ box.T).T
    min_x = box2[:, 0].min()
    min_y = box2[:, 1].min()

    H_gt[0, 2] = -min_x
    H_gt[1, 2] = -min_y

    img_rot_r = np.rot90(img, rotation_index, [0, 1]).copy()
    return H_gt, img_rot_r


def scale_img(img, scale):

    h, w = img.shape[:2]
    scale_o = scale
    # the scale needs to be (slightly) changed so that the aspect ratio is 
    scale = get_integer_scale(scale, h, w)
    print(f"scale: {scale_o} => {scale}")

    H_gt = np.array([
        [scale, 0., 0.5 * (scale - 1)],
        [0., scale, 0.5 * (scale - 1)],
        [0., 0., 1.],
    ])

    dsize = (round(w * scale), round(h * scale))
    pil = Image.fromarray(img)
    pil_scaled = pil.resize(size=dsize, resample=Image.Resampling.LANCZOS)
    np_scaled = np.array(pil_scaled)

    return H_gt, np_scaled


def np_show(img, title=None):
    plt.figure()
    plt.imshow(img)
    if title:
        plt.title(title)
    plt.show()
    plt.close()


def Hs_imgs_for_rotation(file, show=False):

    img = Image.open(file)
    img = np.array(img)

    if show:
        np_show(img, "original")

    cos_a = [0., -1., 0.]
    sin_a = [1., 0., -1.]

    rotations = 3
    Hs_gt_img = [rotate(img, sin_a[i], cos_a[i], i + 1) for i in range(rotations)]
    Hs_gt = [h[0] for h in Hs_gt_img]
    imgs = [img] + [h[1] for h in Hs_gt_img]
    return Hs_gt, imgs


def Hs_imgs_for_scaling(file, scales, crop_h2=False):

    img = Image.open(file)
    img = np.array(img)
    # this is done so that the img dimensions have a big gcd (i.e. gcd(512 - 2, 765) == 255),
    # and by extension so that the aspect ratio can be exactly kept when scaled (see `get_integer_scale`)
    if crop_h2:
        img = img[:img.shape[0] - 2]

    print("Exact scale adjustments:\n")
    h_i_tuples = [scale_img(img, scale) for scale in scales]
    Hs_gt = [e[0] for e in h_i_tuples]
    imgs_r = [e[1] for e in h_i_tuples]
    imgs = [img] + imgs_r
    return Hs_gt, imgs


def run_exp(Hs_gt, imgs, e_name, instance_names):

    print(f"\nrunning experiment: {e_name}\n")
    print("\terror \t\t\t\t #tentatives \t\t #inliers")

    descriptor = cv.SIFT_create()
    kpts_0, desc_0 = descriptor.detectAndCompute(imgs[0], mask=None)

    sum_reproj_err = 0.0
    sum_tent_count = 0
    sum_in_count = 0

    for other_i in range(1, len(imgs)):
        kpts_other, desc_other = descriptor.detectAndCompute(imgs[other_i], mask=None)

        src_pts, dst_pts, _, _, tentative_matches = get_tentatives(kpts_0, desc_0, kpts_other, desc_other, ratio_threshold=0.8)
        if len(src_pts) < 4:
            print(f"WARNING: less than 4 tentatives: {len(src_pts)}")
            continue

        H_est, inlier_mask = cv.findHomography(src_pts, dst_pts,
                                               cv.RANSAC,
                                               maxIters=100000,
                                               ransacReprojThreshold=0.5,
                                               confidence=0.9999)
        H_gt = Hs_gt[other_i - 1]

        reproj_err = get_visible_part_mean_absolute_reprojection_error(imgs[0], imgs[other_i], H_gt, H_est)
        tent_count = len(src_pts)
        in_count = int(inlier_mask.sum())

        print(f"{instance_names[other_i - 1]}\t{reproj_err:.6f} \t\t\t {tent_count} \t\t\t {in_count}")
        sum_reproj_err += reproj_err
        sum_tent_count += tent_count
        sum_in_count += in_count

    print(f"Sum:\t{sum_reproj_err:.6f} \t\t\t {sum_tent_count} \t\t\t {sum_in_count}")




In [2]:
# see https://www.robots.ox.ac.uk/~vgg/data/affine/

Hs_bark = [
    [[0.7022029025774007, 0.4313737491020563, -127.94661199701689],
     [-0.42757325092889575, 0.6997834349758094, 201.26193857481698],
     [4.083733373964227E-6, 1.5076445750988132E-5, 1.0]],

    [[-0.48367041358997964, -0.2472935325077872, 870.2215120216712],
     [0.29085746679198893, -0.45733473891783305, 396.1604918833091],
     [-3.578663704630333E-6, 6.880007548843957E-5, 1.0]],

    [[-0.20381418476462312, 0.3510201271914591, 247.1085214229702],
     [-0.3499531830464912, -0.1975486500576974, 466.54576370699766],
     [-1.5735788289619667E-5, 1.0242951905091244E-5, 1.0]],

    [[0.30558415717792214, 0.12841186681168829, 200.94588793078017],
     [-0.12861248979242065, 0.3067557133397112, 133.77000196887894],
     [2.782320090398499E-6, 5.770764104061954E-6, 1.0]],

    [[-0.23047631546234373, -0.10655686701035443, 583.3200507850402],
     [0.11269946585180685, -0.20718914340861153, 355.2381263740649],
     [-3.580280012615393E-5, 3.2283960511548054E-5, 1.0]],
]

Hs_bark = np.array(Hs_bark)
files_bark = [f"imgs/bark/img{i + 1}.ppm" for i in range(6)]
imgs_bark = read_imgs(files_bark, show=False)

Hs_boat = [
    [[8.5828552e-01, 2.1564369e-01, 9.9101418e+00],
     [-2.1158440e-01, 8.5876360e-01, 1.3047838e+02],
     [2.0702435e-06, 1.2886110e-06, 1.0000000e+00]],

    [[5.6887079e-01, 4.6997572e-01, 2.5515642e+01],
     [-4.6783159e-01, 5.6548769e-01, 3.4819925e+02],
     [6.4697420e-06, -1.1704138e-06, 1.0000000e+00]],

    [[1.0016637e-01, 5.2319717e-01, 2.0587932e+02],
     [-5.2345249e-01, 8.7390786e-02, 5.3454522e+02],
     [9.4931475e-06, -9.8296917e-06, 1.0000000e+00]],

    [[4.2310823e-01, -6.0670438e-02, 2.6635003e+02],
     [6.2730152e-02, 4.1652096e-01, 1.7460201e+02],
     [1.5812849e-05, -1.4368783e-05, 1.0000000e+00]],

    [[2.9992872e-01, 2.2821975e-01, 2.2930182e+02],
     [-2.3832758e-01, 2.4564042e-01, 3.6767399e+02],
     [9.9064973e-05, -5.8498673e-05, 1.0000000e+00]]
]
Hs_boat = np.array(Hs_boat)
files_boat = [f"imgs/boat/img{i + 1}.pgm" for i in range(6)]
imgs_boat = read_imgs(files_boat, show=False)

Hs_gt_rot, imgs_rot = Hs_imgs_for_rotation(files_bark[0], show=False)

scales = [scale_int / 10 for scale_int in range(2, 10)]

Hs_gt_sc_lanczos, imgs_sc_lanczos = Hs_imgs_for_scaling(files_bark[0], scales, crop_h2=True)

run_exp(Hs_gt_rot, imgs_rot, "synthetic rotation by multiples of pi/2", ["90deg", "180deg", "270deg"])
run_exp(Hs_bark, imgs_bark, "bark", [f"img{i}" for i in range(2, 7)])
run_exp(Hs_boat, imgs_boat, "boat", [f"img{i}" for i in range(2, 7)])
run_exp(Hs_gt_sc_lanczos, imgs_sc_lanczos, "synthetic rescaling by lanczos", [f"{s}" for s in scales])


Exact scale adjustments:

scale: 0.2 => 0.2
scale: 0.3 => 0.2980392156862745
scale: 0.4 => 0.4
scale: 0.5 => 0.5019607843137255
scale: 0.6 => 0.6
scale: 0.7 => 0.6980392156862745
scale: 0.8 => 0.8
scale: 0.9 => 0.9019607843137255

running experiment: synthetic rotation by multiples of pi/2

	error 				 #tentatives 		 #inliers
90deg	0.499982 			 3464 			 3452
180deg	0.704858 			 3369 			 3349
270deg	0.499913 			 3438 			 3430
Sum:	1.704754 			 10271 			 10231

running experiment: bark

	error 				 #tentatives 		 #inliers
img2	1.332286 			 654 			 241
img3	2.178040 			 565 			 298
img4	1.339833 			 699 			 620
img5	0.711655 			 490 			 433
img6	1.256951 			 305 			 261
Sum:	6.818765 			 2713 			 1853

running experiment: boat

	error 				 #tentatives 		 #inliers
img2	0.313058 			 2468 			 990
img3	0.285631 			 1838 			 926
img4	1.327328 			 757 			 241
img5	0.459227 			 550 			 140
img6	5.696269 			 267 			 63
Sum:	8.081513 			 5880 			 2360

running experiment: synthetic rescaling by la