In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import numpy as np
import torch
import json
import viser.transforms as vt
from internal.dataparsers.matrix_city_dataparser import MatrixCity

#

The up direction of the MatrixCity is +z, while MegaNeRF requires -x.
  
So rotate the cameras $-\pi/2$ (clockwise, 90 degrees) alone the y-axis.

Here builds the rotation matrix.

In [None]:
y_clockwise_rotation_half_pi = torch.eye(4, dtype=torch.double)
y_clockwise_rotation_half_pi[:3, :3] = torch.from_numpy(vt.SO3.from_y_radians(-np.pi / 2).as_matrix())
y_clockwise_rotation_half_pi

In [None]:
dataset_path = os.path.expanduser("~/data-fast/MatrixCity/small_city-self_render/aerial")
converted_output_path = os.path.expanduser("~/data-fast/MatrixCity/aerial-meganerf/block_1_and_2")

# dataparser
Load MatrixCity dataset

In [None]:
dataparser = MatrixCity(
    train=[
        "block_1/transforms.json",
        "block_2/transforms.json",
    ],
    test=[
        "block_1_test/transforms.json",
        "block_2_test/transforms.json",
    ],
).instantiate(dataset_path, os.getcwd(), 0)
dataparser

In [None]:
dataparser_outputs = dataparser.get_outputs()
dataparser_outputs

In [None]:
dataparser_outputs.train_set.image_names[0], len(dataparser_outputs.train_set.image_names)

Convert poses to y up, z back

In [None]:
train_c2w_from_dataparser = torch.linalg.inv(dataparser_outputs.train_set.cameras.world_to_camera.transpose(1, 2).to(torch.double))
train_c2w_from_dataparser[:, :3, 1:3] *= -1
train_c2w_from_dataparser[0]

In [None]:
test_c2w_from_dataparser = torch.linalg.inv(dataparser_outputs.test_set.cameras.world_to_camera.transpose(1, 2).to(torch.double))
test_c2w_from_dataparser[:, :3, 1:3] *= -1
test_c2w_from_dataparser[0]

Rotate the camera poses

In [None]:
rotated_train_c2ws = y_clockwise_rotation_half_pi @ train_c2w_from_dataparser
rotated_train_c2ws[0]

In [None]:
rotated_test_c2ws = y_clockwise_rotation_half_pi @ test_c2w_from_dataparser
rotated_test_c2ws[0]

# json
Just for validating the poses loaded from dataparser are correct.
  
This section can be ignored.

In [None]:
with open(os.path.expanduser("~/data-extra/MatrixCity/small_city/aerial/pose/block_A/transforms_train.json"), "r") as f:
    transforms = json.load(f)
transforms.keys()

In [None]:
transforms["frames"][0]

In [None]:
train_c2ws_gt = torch.tensor([i["transform_matrix"] for i in transforms["frames"]], dtype=torch.double)
train_c2ws_gt.shape, train_c2ws_gt[0]

In [None]:
# validate that the conversion from dataparser is correct
torch.allclose(train_c2ws_gt[0], train_c2w_from_dataparser[0]), torch.abs(train_c2ws_gt - train_c2w_from_dataparser).max()

In [None]:
rotated_from_json = (y_clockwise_rotation_half_pi @ train_c2ws_gt)
(rotated_from_json - rotated_train_c2ws).max(), torch.allclose(rotated_from_json, rotated_train_c2ws, atol=1e-7)

## calculate some required info

In [None]:
camera_centers = rotated_train_c2ws[:, :3, 3]
torch.min(camera_centers, dim=0).values, torch.max(camera_centers, dim=0).values

Take the middle of the min and max as the origin

In [None]:
origin = (torch.max(camera_centers, dim=0).values + torch.min(camera_centers, dim=0).values) * 0.5
origin

Rotate the points

In [None]:
rotated_point_xyzs = torch.from_numpy(dataparser_outputs.point_cloud.xyz).to(
    torch.double) @ y_clockwise_rotation_half_pi[:3, :3].T
rotated_point_xyzs.shape

In [None]:
# ray_altitude_range is the x
torch.min(rotated_point_xyzs, dim=0).values, torch.max(rotated_point_xyzs, dim=0).values

In [None]:
# Scene extent
torch.max(rotated_point_xyzs, dim=0).values - torch.min(rotated_point_xyzs, dim=0).values

In [None]:
ray_altitude_range = torch.stack([torch.min(rotated_point_xyzs, dim=0).values[0], torch.max(rotated_point_xyzs, dim=0).values[0]])
ray_altitude_range

## move and rescale cameras
map camera centers to to [-1, 1]

In [None]:
camera_centers_moved = camera_centers - origin[None, :]
torch.min(camera_centers_moved, dim=0).values, torch.max(camera_centers_moved, dim=0).values

In [None]:
scale = torch.max(camera_centers_moved)
scale

In [None]:
# just take a number looks more comfortable
scale = torch.tensor(4.5, dtype=torch.float64)

Update the camera centers of c2w matrix

In [None]:
recentered_and_scaled_train_c2ws = torch.clone(rotated_train_c2ws)
recentered_and_scaled_train_c2ws[:, :3, 3] -= origin[None, :]
recentered_and_scaled_train_c2ws[:, :3, 3] /= scale

recentered_and_scaled_test_c2ws = torch.clone(rotated_test_c2ws)
recentered_and_scaled_test_c2ws[:, :3, 3] -= origin[None, :]
recentered_and_scaled_test_c2ws[:, :3, 3] /= scale

In [None]:
camera_center_extent = torch.max(recentered_and_scaled_train_c2ws[:, :3, 3], dim=0).values - torch.min(
    recentered_and_scaled_train_c2ws[:, :3, 3], dim=0).values
camera_center_extent, torch.all(camera_center_extent <= 2.)

# save

In [None]:
output_path = os.path.expanduser(converted_output_path)
os.makedirs(output_path, exist_ok=True)
output_path

In [None]:
torch.save({
    "origin_drb": origin,
    "pose_scale_factor": scale.item(),
}, os.path.join(output_path, "coordinates.pt"))

In [None]:
def save_image_set(target_image_set, target_c2ws, target_split, idx_offset: int):
    rgb_dir = os.path.join(output_path, target_split, "rgbs")
    metadata_dir = os.path.join(output_path, target_split, "metadata")
    
    for i in os.scandir(rgb_dir):
        if not i.is_dir(follow_symlinks=False):
            os.unlink(i.path)
    for i in os.scandir(metadata_dir):
        if not i.is_dir(follow_symlinks=False):
            os.unlink(i.path)
    
    os.makedirs(rgb_dir, exist_ok=True)
    os.makedirs(metadata_dir, exist_ok=True)
    for idx in range(len(target_image_set)):
        name_idx = idx + idx_offset
        os.link(target_image_set.image_paths[idx], os.path.join(rgb_dir, "{:06d}.png".format(name_idx)))
        torch.save({
            'H': target_image_set.cameras.height[idx].int().item(),
            'W': target_image_set.cameras.width[idx].int().item(),
            'c2w': target_c2ws[idx].to(torch.float)[:3],
            'intrinsics': torch.tensor([
                target_image_set.cameras.fx[idx],
                target_image_set.cameras.fy[idx],
                target_image_set.cameras.cx[idx],
                target_image_set.cameras.cy[idx],
            ]),
        }, os.path.join(metadata_dir, "{:06d}.pt".format(name_idx)))

In [None]:
save_image_set(
    dataparser_outputs.train_set,
    recentered_and_scaled_train_c2ws,
    "train",
    0,
)
save_image_set(
    dataparser_outputs.test_set,
    recentered_and_scaled_test_c2ws,
    "val",
    len(dataparser_outputs.train_set),
)

In [None]:
torch.load(os.path.join(output_path, "train", "metadata", "{:06d}.pt".format(256)))

In [None]:
# preview transformed ray_altitude_range (you should use the value of `ray_altitude_range` in config file)
ray_altitude_range, (ray_altitude_range - origin[0]) / scale

# Preview

In [None]:
from internal.utils.graphics_utils import store_ply
camera_list = []
for image_index, c2w in enumerate(recentered_and_scaled_train_c2ws):
    camera_list.append({
        "id": image_index,
        "img_name": "{:06d}".format(image_index),
        "width": 1920,
        "height": 1080,
        "position": (c2w[:3, 3].numpy() * scale.item()).tolist(),
        "rotation": c2w[:3, :3].numpy().tolist(),
        "fx": 1600,
        "fy": 1600,
        "color": [255, 0, 0],
    })

preview_json_path = os.path.join(output_path, "preview.json")
with open(preview_json_path, "w") as f:
    json.dump(camera_list, f)

preview_ply_path = os.path.join(output_path, "preview.ply")
store_ply(preview_ply_path, (rotated_point_xyzs - origin).numpy(), dataparser_outputs.point_cloud.rgb)
    
"python utils/show_cameras.py --cameras {} --points {}".format(preview_json_path, preview_ply_path)

# convert to colmap
The conversion above should work.

Converting to colmap just for validating the conversion outputs.

The outputs of `colmap_to_mega_nerf.py` and above should be identical.

In [None]:
from internal.utils import colmap

In [None]:
colmap_output_path = os.path.join(output_path, "colmap")
colmap_output_path

In [None]:
idx = 0
colmap_image_path = os.path.join(colmap_output_path, "images")
os.makedirs(colmap_image_path, exist_ok=True)

for i in os.scandir(colmap_image_path):
    if not i.is_dir():
        os.unlink(i.path)

colmap_image_name_to_c2w = {}
for image_idx, i in enumerate(dataparser_outputs.train_set.image_paths):
    colmap_image_name = "{:06d}.png".format(idx)
    colmap_image_name_to_c2w[colmap_image_name] = rotated_train_c2ws[image_idx]
    os.link(i, os.path.join(colmap_image_path, colmap_image_name))
    idx += 1

for image_idx, i in enumerate(dataparser_outputs.test_set.image_paths):
    colmap_image_name = "{:06d}.png".format(idx)
    colmap_image_name_to_c2w[colmap_image_name] = rotated_test_c2ws[image_idx]
    os.link(i, os.path.join(colmap_image_path, colmap_image_name))
    idx += 1
    
len(colmap_image_name_to_c2w)

In [None]:
colmap_db_path = os.path.join(colmap_output_path, "colmap.db")
print(" \\\n    ".join([
    "colmap",
    "feature_extractor",
    "--database_path=" + colmap_db_path,
    "--image_path=" + colmap_image_path,
    "--ImageReader.camera_model=PINHOLE",
    "--ImageReader.single_camera=1",
]))

In [None]:
import sqlite3
colmap_db = sqlite3.connect(colmap_db_path)
def select_image(image_name: str):
    cur = colmap_db.cursor()
    try:
        return cur.execute("SELECT image_id, camera_id FROM images WHERE name = ?", [image_name]).fetchone()
    finally:
        cur.close()

In [None]:
c2w_transform = torch.tensor([
    [0, -1, 0, 0],
    [1, 0, 0, 0],
    [0, 0, 1, 0],
    [0, 0, 0, 1],
], dtype=torch.double).T
RDF_TO_DRB_H = torch.tensor([
    [0, 1, 0, 0],
    [1, 0, 0, 0],
    [0, 0, -1, 0],
    [0, 0, 0, 1],
], dtype=torch.double)

In [None]:
colmap_images = {}
for colmap_image_name, c2w in colmap_image_name_to_c2w.items():
    image_id, _ = select_image(colmap_image_name)
    c2w = torch.linalg.inv(RDF_TO_DRB_H) @ c2w @ c2w_transform @ RDF_TO_DRB_H
    w2c = torch.linalg.inv(c2w)
    
    colmap_images[image_id] = colmap.Image(
        image_id,
        qvec=colmap.rotmat2qvec(w2c[:3, :3].numpy()),
        tvec=w2c[:3, 3].numpy(),
        camera_id=1,
        name=colmap_image_name,
        xys=np.array([], dtype=np.float64),
        point3D_ids=np.asarray([], dtype=np.int64),
    )

In [None]:
def array_to_blob(array):
    return array.tostring()
def update_camera_params(camera_id: int, params: np.ndarray):
    cur = colmap_db.cursor()
    try:
        cur.execute("UPDATE cameras SET params = ? WHERE camera_id = ?", [
            array_to_blob(params),
            camera_id,
        ])
        colmap_db.commit()
    finally:
        cur.close()
colmap_camera_params = np.asarray([
    dataparser_outputs.train_set.cameras.fx[0].item(),
    dataparser_outputs.train_set.cameras.fy[0].item(),
    dataparser_outputs.train_set.cameras.cx[0].item(),
    dataparser_outputs.train_set.cameras.cy[0].item(),
])
update_camera_params(1, colmap_camera_params)
colmap_cameras = {1: colmap.Camera(
    id=1,
    model="PINHOLE",
    width=dataparser_outputs.train_set.cameras.width[0].int().item(),
    height=dataparser_outputs.train_set.cameras.height[0].int().item(),
    params=colmap_camera_params,
)}

In [None]:
colmap_db.close()

In [None]:
sparse_manually_model_dir = os.path.join(colmap_output_path, "sparse_manually")
os.makedirs(sparse_manually_model_dir, exist_ok=True)
colmap.write_images_binary(colmap_images, os.path.join(sparse_manually_model_dir, "images.bin"))
colmap.write_cameras_binary(colmap_cameras, os.path.join(sparse_manually_model_dir, "cameras.bin"))
colmap.write_points3D_binary({}, os.path.join(sparse_manually_model_dir, "points3D.bin"))

In [None]:
print(" \\\n    ".join([
    "colmap",
    "vocab_tree_matcher",
    "--database_path=" + colmap_db_path,
    "--VocabTreeMatching.vocab_tree_path=" + os.path.expanduser("~/.cache/colmap/vocab_tree_flickr100K_words256K.bin"),
]))

In [None]:
sparse_dir_triangulated = os.path.join(colmap_output_path, "sparse")
os.makedirs(sparse_dir_triangulated, exist_ok=True)
print(" \\\n    ".join([
    "colmap",
    "point_triangulator",
    "--database_path", colmap_db_path,
    "--image_path", colmap_image_path,
    "--input_path", sparse_manually_model_dir,
    "--output_path", sparse_dir_triangulated,
]))