In [16]:
# [Cell 1] - Set up paths
import os

is_colab = True
if is_colab:
  from google.colab import drive
  drive.mount('/content/drive')
  BASE_PATH = "/content/drive/MyDrive"  # This will use the current directory
  submission_path = os.path.join(BASE_PATH,  "submission.csv")
  save_model = os.path.join(BASE_PATH,  "siamese.pth")
else:
  BASE_PATH = "/kaggle/input/cvproject"
  submission_path = "submission.csv"
  save_model = "siamese.pth"

num_epochs = 1
batch_size = 10
num_workers = 1 # workers in dataloader
num_train = 100 # number or None
num_test = 1
accumulation_steps = 1
is_tiny_roma = False
# !unzip "/content/drive/MyDrive/cv-22928-2025-a-project.zip" -d "/content/"

project_folder = "cv-22928-2025-a-project/train"
project_folder = os.path.join(BASE_PATH, project_folder)
src = os.path.join(BASE_PATH, "cv-22928-2025-a-project")

test_csv = os.path.join(BASE_PATH, "cv-22928-2025-a-project/test.csv")


if os.path.exists(project_folder):
    print(f"Successfully found project directory at: {project_folder}")
else:
    print(f"ERROR: Could not find project directory at: {project_folder}")
    print(f"Current working directory: {os.getcwd()}")
    print(f"Please check if the path '{project_folder}' is correct")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Successfully found project directory at: /content/drive/MyDrive/cv-22928-2025-a-project/train


In [None]:
!pip install git+https://github.com/Parskatt/RoMa.git
!pip install tqdm
!pip install torch torchvision pycolmap

Collecting git+https://github.com/Parskatt/RoMa.git
  Cloning https://github.com/Parskatt/RoMa.git to /tmp/pip-req-build-lv_q34o0
  Running command git clone --filter=blob:none --quiet https://github.com/Parskatt/RoMa.git /tmp/pip-req-build-lv_q34o0
  Resolved https://github.com/Parskatt/RoMa.git to commit 64f20c7ee67e7ea5bd1448c3e9468a8c5f2f06b9
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting kornia (from romatch==0.0.1)
  Downloading kornia-0.8.0-py2.py3-none-any.whl.metadata (17 kB)
Collecting loguru (from romatch==0.0.1)
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Collecting poselib (from romatch==0.0.1)
  Downloading poselib-2.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (176 bytes)
Collecting kornia_rs>=0.1.0 (from kornia->romatch==0.0.1)
  Downloading kornia_rs-0.1.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->romatch==0.0.1)
  Down

In [None]:


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms
import cv2
import numpy as np
from romatch import roma_outdoor
from tqdm import tqdm

device_str = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(device_str)

In [None]:
from romatch import tiny_roma_v1_outdoor, roma_outdoor
if is_tiny_roma:
  roma_model = tiny_roma_v1_outdoor(device=device)
  num_inputs = 15000
else:
  roma_model = roma_outdoor(device=device)
  num_inputs = 30000

Downloading: "https://github.com/Parskatt/storage/releases/download/roma/tiny_roma_v1_outdoor.pth" to /root/.cache/torch/hub/checkpoints/tiny_roma_v1_outdoor.pth
100%|██████████| 10.9M/10.9M [00:00<00:00, 62.1MB/s]
Downloading: "https://github.com/verlab/accelerated_features/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://github.com/verlab/accelerated_features/raw/main/weights/xfeat.pt" to /root/.cache/torch/hub/checkpoints/xfeat.pt
100%|██████████| 5.96M/5.96M [00:00<00:00, 94.9MB/s]


In [None]:


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms
import cv2
import numpy as np
from romatch import roma_outdoor
from tqdm import tqdm
import pycolmap


import torch
import torch.nn as nn
import torch.optim as optim

def normalize_frobenius_norm(matrix):
        """Compute the Frobenius norm of a matrix and return the normalized matrix."""
        norm = torch.norm(matrix, p='fro', dim=(-2, -1), keepdim=True)  # Frobenius norm
        normalized_matrix = matrix / (norm + 1e-8)  # Add epsilon to prevent division by zero
        return normalized_matrix

class SiameseFundamentalMatrixNet(nn.Module):
    def __init__(self):
        super(SiameseFundamentalMatrixNet, self).__init__()

        # Keypoint processing sub-network (shared weights for both kptsA and kptsB)
        self.fc1_kpts = nn.Linear(num_inputs, 32)  # Processing 2D keypoints (x, y)
        self.fc2_kpts = nn.Linear(32, 32)
        self.fc3_kpts = nn.Linear(32, 32)
        self.fc_Fest = nn.Linear(9, 32)  # Flattened F_est (3x3 matrix)

        # Final layers
        self.fc_final = nn.Linear(32*3, 9)  # Predicted 3x3 matrix (flattened)

        self.relu = nn.ReLU()

    def forward_once(self, x):
        """Process one set of keypoints (either A or B)."""
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1_kpts(x))
        x = self.relu(self.fc2_kpts(x))
        x = self.relu(self.fc3_kpts(x))
        return x


    def forward(self, kptsA, kptsB, F_est):

        # Process keypoints A and B through the same shared network
        F_est = normalize_frobenius_norm(F_est)
        outA = self.forward_once(kptsA)
        outB = self.forward_once(kptsB)
        Fest_out = self.relu(self.fc_Fest(F_est.reshape(-1, 9)))  # Flatten F_est (3x3)

        # Concatenate all features
        combined_features = torch.cat((outA, outB, Fest_out), dim=1)

        # Final fully connected layer to get the fundamental matrix
        F_pred = self.fc_final(combined_features).view(-1, 3, 3)  # Output as a 3x3 matrix
        return normalize_frobenius_norm(F_pred)

In [None]:

class ImagePairDataset(Dataset):
    def __init__(self, image_pairs, fundamental_matrices, transform=None):
        self.image_pairs = image_pairs
        self.fundamental_matrices = fundamental_matrices
        self.transform = transform

    def __len__(self):
        return len(self.image_pairs)

    def __getitem__(self, idx):
        imA_path, imB_path = self.image_pairs[idx]
        F = self.fundamental_matrices[idx]

        warp, certainty = roma_model.match(imA_path, imB_path)
        # Sample matches for estimation
        matches, certainty = roma_model.sample(warp, certainty)
        # Get image dimensions (height, width, channels)
        H_A, W_A, _ = cv2.imread(imA_path).shape
        H_B, W_B, _ = cv2.imread(imB_path).shape

        # Convert to pixel coordinates (RoMa produces matches in [-1,1]x[-1,1])
        kptsA, kptsB = roma_model.to_pixel_coordinates(matches, H_A, W_A, H_B, W_B)
        if (kptsA is None) or (kptsA.size(0) == 0) or (kptsB is None) or (kptsB.size(0) == 0):
          F_est = None
        else:
          F_est, _ = cv2.findFundamentalMat(
            kptsA.cpu().numpy(),
            kptsB.cpu().numpy(),
            ransacReprojThreshold=0.2,
            method=cv2.USAC_MAGSAC,
            confidence=0.999999,
            maxIters=10000
          )
        F_est = F_est if F_est is not None else np.eye(3, 3)
        F_est = torch.tensor(F_est, dtype=torch.float32).to(device)
        F = torch.tensor(F, dtype=torch.float32)
        certainty = certainty.to(device)

        kptsA = torch.cat((kptsA, certainty.unsqueeze(1)), dim=1)
        kptsB = torch.cat((kptsB, certainty.unsqueeze(1)), dim=1)
        return kptsA.to(device), kptsB.to(device), F_est.to(device), F.to(device)

In [5]:

from collections import namedtuple

def LoadCalibration(filename):
    Gt = namedtuple('Gt', ['K', 'R', 'T'])

    '''Load calibration data (ground truth) from the csv file.'''

    calib_dict = {}
    with open(filename, 'r') as f:
        reader = csv.reader(f, delimiter=',')
        for i, row in enumerate(reader):
            # Skip header.
            if i == 0:
                continue

            camera_id = row[1]
            K = np.array([float(v) for v in row[2].split(' ')]).reshape([3, 3])
            R = np.array([float(v) for v in row[3].split(' ')]).reshape([3, 3])
            T = np.array([float(v) for v in row[4].split(' ')])
            calib_dict[camera_id] = Gt(K=K, R=R, T=T)

    return calib_dict

In [8]:
import glob
import pandas as pd
import csv
import numpy as np

class_folders = glob.glob(project_folder + "/*/")
print(class_folders)

dataframes = []
calib_dict = {}
# Loop through the CSV files and read them into DataFrames
for folder in class_folders:
    calib_dict[folder] = LoadCalibration(f'{folder}/calibration.csv')

    df = pd.read_csv(folder + "/pair_covisibility.csv", index_col=False)
    df["img1_path"] = folder + "images/" + df["im1"] + ".jpg"
    df["img2_path"] = folder + "images/" + df["im1"] + ".jpg"
    df["folder"] = folder
    dataframes.append(df)

combined_df = pd.concat(dataframes, ignore_index=True)
combined_df.shape

['/content/drive/MyDrive/cv-22928-2025-a-project/train/trevi_fountain/', '/content/drive/MyDrive/cv-22928-2025-a-project/train/sacre_coeur/', '/content/drive/MyDrive/cv-22928-2025-a-project/train/notre_dame_front_facade/', '/content/drive/MyDrive/cv-22928-2025-a-project/train/temple_nara_japan/', '/content/drive/MyDrive/cv-22928-2025-a-project/train/taj_mahal/', '/content/drive/MyDrive/cv-22928-2025-a-project/train/sagrada_familia/', '/content/drive/MyDrive/cv-22928-2025-a-project/train/lincoln_memorial_statue/', '/content/drive/MyDrive/cv-22928-2025-a-project/train/colosseum_exterior/', '/content/drive/MyDrive/cv-22928-2025-a-project/train/pantheon_exterior/', '/content/drive/MyDrive/cv-22928-2025-a-project/train/brandenburg_gate/', '/content/drive/MyDrive/cv-22928-2025-a-project/train/british_museum/', '/content/drive/MyDrive/cv-22928-2025-a-project/train/buckingham_palace/']


(84578, 9)

In [17]:
df = combined_df.loc[combined_df.covisibility>0.1, :]
df.shape
df = df.copy()
if num_train is not None:
  df = df.iloc[:num_train, :]

In [19]:
# df["fundamental_matrix"].apply(unflatten_matrix).apply(np.linalg.det)
df

Unnamed: 0.1,Unnamed: 0,pair,covisibility,fundamental_matrix,im1,im2,img1_path,img2_path,folder
0,0,99678699_7739302088-66225128_7739308762,0.936,-8.26594108e-04 9.40690118e-03 7.38901850e+00 ...,99678699_7739302088,66225128_7739308762,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...
1,1,97815951_7600091342-89086898_3494993799,0.905,-4.08947831e-01 2.91483633e+00 -2.93479953e+03...,97815951_7600091342,89086898_3494993799,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...
2,2,97815951_7600091342-76104794_5487278337,0.933,-1.16973317e-02 -9.23100638e-01 5.66360959e+02...,97815951_7600091342,76104794_5487278337,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...
3,3,97815951_7600091342-24304044_5235207507,0.939,-7.47888333e-02 9.28854893e-01 -1.12108320e+03...,97815951_7600091342,24304044_5235207507,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...
4,4,97815951_7600091342-18959193_201239784,0.914,-1.01879040e-01 -1.65062367e+00 1.25659100e+03...,97815951_7600091342,18959193_201239784,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...
...,...,...,...,...,...,...,...,...,...
95,95,62749472_2337325474-58190554_4740116131,0.911,-5.05052409e-01 -3.70890951e+00 2.01060785e+03...,62749472_2337325474,58190554_4740116131,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...
96,96,62346964_2989395069-22907054_512711668,0.911,7.57799877e-02 -7.13031640e-01 1.09767945e+03 ...,62346964_2989395069,22907054_512711668,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...
97,97,29250228_5522743340-14792943_5522155907,0.925,-5.68332495e-04 -1.00454102e-02 2.20971360e+01...,29250228_5522743340,14792943_5522155907,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...
98,98,24304044_5235207507-19142233_13568335013,0.907,4.30262933e-03 -1.33245289e-01 3.18282666e+01 ...,24304044_5235207507,19142233_13568335013,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...,/content/drive/MyDrive/cv-22928-2025-a-project...


In [11]:
# [Cell 11] - Helper function for matrix flattening
def flatten_matrix(M, num_digits=8):
    """Convert matrix to string format for submission."""
    return ' '.join([f'{v:.{num_digits}e}' for v in M.flatten()])

def unflatten_matrix(flattened_str):
    """Convert a flattened string back into a 3x3 matrix."""
    # Split the flattened string into a list of values
    values = list(map(float, flattened_str.split()))

    # Ensure the number of values is 9 (for a 3x3 matrix)
    if len(values) != 9:
        raise ValueError("The flattened string must contain exactly 9 values.")

    # Reshape the list of values into a 3x3 matrix
    return np.array(values).reshape(3, 3)

In [None]:
# Example data (replace with real dataset)

image_pairs = [x for x in zip(df["img1_path"], df["img2_path"])] # Image pairs (replace with your dataset)
fundamental_matrices = [unflatten_matrix(x) for x in df["fundamental_matrix"]]# Random fundamental matrices (replace with ground truth)

# Create dataset and dataloader
dataset = ImagePairDataset(image_pairs, fundamental_matrices)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:


# Initialize model, loss function, and optimizer
model = SiameseFundamentalMatrixNet().to(device)
criterion = nn.L1Loss()  # Mean Squared Error loss
optimizer = optim.AdamW(model.parameters(), lr=1e-4)

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

trainable_params/1e6

0.483337

In [None]:

from tqdm import tqdm

scaler = torch.amp.GradScaler(device)

# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for idx, (kptsA, kptsB, F_est, F_gt) in tqdm(enumerate(dataloader)):

        optimizer.zero_grad()
        print(idx)
        with torch.cuda.amp.autocast():

          # Forward pass
          F_pred = model.forward(kptsA, kptsB, F_est)

          # Compute loss
          F_gt = normalize_frobenius_norm(F_gt)
          loss = torch.min(criterion(F_pred, F_gt), criterion(-F_pred, F_gt))
        # Backward pass and optimize
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {epoch_loss / len(dataloader)}")

torch.save(model.state_dict(), save_model)

0it [00:00, ?it/s]

1018 674


  with torch.cuda.amp.autocast():
1it [00:05,  5.93s/it]

0
Epoch 1, Loss: 0.3689790964126587





RuntimeError: File /content/drive/MyDrive/siamese.pth cannot be opened.

In [None]:
del dataset
del dataloader

In [None]:
import pandas as pd

df_test = pd.read_csv(test_csv, index_col=False)
df_test["img1_path"] = df_test.apply(lambda row: os.path.join(src, "test_images", row["batch_id"], f"{row['image_1_id']}.jpg"), axis=1)
df_test["img2_path"] = df_test.apply(lambda row: os.path.join(src, "test_images", row["batch_id"], f"{row['image_2_id']}.jpg"), axis=1)
df_test.shape
if num_test is not None:
  df_test = df_test.iloc[:num_train, :]

In [None]:

class TestDataset(Dataset):
    def __init__(self, image_pairs, sample_id, transform=None):
        self.image_pairs = image_pairs
        self.sample_id = sample_id
        self.transform = transform

    def __len__(self):
        return len(self.image_pairs)

    def __getitem__(self, idx):
        imA_path, imB_path = self.image_pairs[idx]
        sample_id = self.sample_id[idx]

        warp, certainty = roma_model.match(imA_path, imB_path)
        # Sample matches for estimation
        matches, certainty = roma_model.sample(warp, certainty)
        # Get image dimensions (height, width, channels)
        H_A, W_A, _ = cv2.imread(imA_path).shape
        H_B, W_B, _ = cv2.imread(imB_path).shape
        # Convert to pixel coordinates (RoMa produces matches in [-1,1]x[-1,1])
        kptsA, kptsB = roma_model.to_pixel_coordinates(matches, H_A, W_A, H_B, W_B)

        if (kptsA is None) or (kptsA.size(0) == 0) or (kptsB is None) or (kptsB.size(0) == 0):
          F_est = None
        else:
          F_est, _ = cv2.findFundamentalMat(
            kptsA.cpu().numpy(),
            kptsB.cpu().numpy(),
            ransacReprojThreshold=0.2,
            method=cv2.USAC_MAGSAC,
            confidence=0.999999,
            maxIters=10000
          )
        F_est = F_est if F_est is not None else np.eye(3, 3)
        F_est = torch.tensor(F_est, dtype=torch.float32).to(device)

        certainty = certainty.to(device)

        kptsA = torch.cat((kptsA, certainty.unsqueeze(1)), dim=1)
        kptsB = torch.cat((kptsB, certainty.unsqueeze(1)), dim=1)
        return kptsA.to(device), kptsB.to(device), F_est.to(device), sample_id

In [None]:
# Example data (replace with real dataset)

image_pairs = [x for x in zip(df_test["img1_path"], df_test["img2_path"])] # Image pairs (replace with your dataset)
sample_id = df_test["sample_id"].values
# Create dataset and dataloader
dataset_test = TestDataset(image_pairs, sample_id)
dataloader_test = DataLoader(dataset_test, batch_size=batch_size)

In [None]:

def test_model_with_dataloader(dataloader_test, model, submission_path):
    """
    Perform model inference and save the results (fundamental matrices) to a CSV file.

    Args:
        dataloader_test (DataLoader): PyTorch DataLoader for test samples.
        model (nn.Module): Trained PyTorch model that estimates the fundamental matrix.
        submission_path (str): Path to save the output CSV file.
    """

    # Open the submission file in write mode
    with open(submission_path, 'w') as f:
        f.write('sample_id,fundamental_matrix\n')  # Write the header

        # Use torch.no_grad() to disable gradient computation during inference
        with torch.no_grad():
            # Loop through the DataLoader batches
            for idx_run, (kptsA, kptsB, F_est, sample_id) in tqdm(enumerate(dataloader_test)):
                print(idx_run)
                # Make sure the data is on the correct device (GPU/CPU)
                kptsA, kptsB, F_est = kptsA.to(device), kptsB.to(device), F_est.to(device)

                with torch.cuda.amp.autocast():

                  # Forward pass through the model to get the predicted fundamental matrix
                  F_pred = model.forward(kptsA, kptsB, F_est)

                # Loop through the batch and save the result for each sample
                for idx, sample_id_curr in enumerate(sample_id):
                    # Flatten the predicted fundamental matrix for output
                    flattened_F = flatten_matrix(F_pred[idx])  # Move to CPU for saving to file
                    f.write(f'{sample_id_curr},{flattened_F}\n')

    print(f"Submission saved to: {submission_path}")


def flatten_matrix(M, num_digits=8):
    """Convert matrix to string format for submission."""
    return ' '.join([f'{v:.{num_digits}e}' for v in M.flatten()])

In [None]:
test_model_with_dataloader(dataloader_test, model, submission_path)

0it [00:00, ?it/s]

761 1016


  with torch.cuda.amp.autocast():  # If you're using automatic mixed precision
1it [00:45, 45.60s/it]

0
-3.27394336e-01 -2.85426706e-01 7.58841693e-01 1.28143430e-01 1.82519495e-01 2.04441808e-02 -1.66736722e-01 3.04461211e-01 -2.54664451e-01
Submission saved to: /content/drive/MyDrive/submission.csv



