# 3DFront dataset analysis

In [None]:
DATA_PATH = "/root/desktop/3D-FRONT/3D-FRONT-processed"
dir_names = {
    "prep": "bedrooms_without_lamps", 
    "vert": "bedrooms_without_lamps_full_labels_vertices", 
    "norm": "bedrooms_without_lamps_full/labels", 
    "cam": "bedrooms_without_lamps_full_labels", 
    "cam_fixed_dist": "bedrooms_without_lamps_full_labels_fixed_dist",
    "raw": "bedrooms_without_lamps_full_raw/raw_256",
    "raw_depth": "bedrooms_without_lamps_full_raw/raw_256_depth",
    "raw_depth_normal": "bedrooms_without_lamps_full_raw/raw_256_depth_normal",
    "raw_depth_normal_trans": "bedrooms_without_lamps_full_raw/raw_256_depth_normal_trans",
    "img": "bedrooms_without_lamps_full_images/images_256",
    "img_depth": "bedrooms_without_lamps_full_images/images_256_depth",
    "img_depth_normal": "bedrooms_without_lamps_full_images/images_256_depth_normal_noflip_vmax5",
    "img_depth_normal_trans": "bedrooms_without_lamps_full_images/images_256_depth_normal_trans",
    "img_fixed_dist": "bedrooms_without_lamps_full_images_fixed_dist/images_256", 
    "depth_fixed_dist": "bedrooms_without_lamps_full_images_fixed_dist/depths_256"
}

In [None]:
import os

data_nums = {
    "prep": 0, 
    "vert": 0, 
    "norm": 0, 
    "cam": 0, 
    "cam_fixed_dist": 0,
    "raw": 0,
    "raw_depth": 0,
    "raw_depth_normal": 0,
    "raw_depth_normal_trans": 0,
    "img": 0,
    "img_depth": 0,
    "img_depth_normal": 0,
    "img_depth_normal_trans": 0,
    "img_fixed_dist": 0, 
    "depth_fixed_dist": 0,
}

for id, dir_name in dir_names.items():
    data_nums[id] = len(os.listdir(os.path.join(DATA_PATH, dir_name)))
    print(f"ID: {id}    # of data: {data_nums[id]}")


In [None]:
EX_SCENE_ID = "00110bde-f580-40be-b8bb-88715b338a2a_Bedroom-43072"
data_ex_paths = {
    "prep": 0, 
    "vert": 0, 
    "norm": 0, 
    "cam": 0, 
    "raw": 0,
    "raw_depth": 0,
    "img": 0,
    "img_depth": 0,
}

for id, dir_name in dir_names.items():
    data_path = os.path.join(DATA_PATH, dir_name)
    ex_scene_path = os.path.join(data_path, EX_SCENE_ID)
    data_ex_paths[id] = ex_scene_path
    print("[ID]", id)
    print(os.listdir(data_ex_paths[id]))

In [None]:
import numpy as np

data_examples = {
    "vert": 0, 
    "norm": 0, 
    "cam": 0
}

for id, data_ex_path in data_ex_paths.items():
    if id in data_examples.keys():
        data_examples[id] = np.load(os.path.join(data_ex_path, "boxes.npz"), allow_pickle=True)
        print("[ID]", id)
        for k in data_examples[id].keys():
            print("-KEY:", k)
            print("-VALUE", "\n", data_examples[id][k])
        print()

In [None]:
!pip install h5py

In [None]:
import h5py

# Check hdf5 file
idx = 0
hdf_ex_path = os.path.join(DATA_PATH, dir_names["raw_depth_normal"], EX_SCENE_ID, f"{idx}.hdf5")

with h5py.File(hdf_ex_path, "r") as hdf:
    # List all groups
    print("Keys:", list(hdf.keys()))

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

EX_SCENE_ID = "00110bde-f580-40be-b8bb-88715b338a2a_Bedroom-43072" # "bbed00a6-d2e0-4f55-a5cc-09db9e3dd6cd_SecondBedroom-45688"

# Check rendered image
idx = 0
img_types = ["colors", "normals", "depth"]
img_ex_paths = {k: 0 for k in img_types}
for img_type in img_types:
    img_ex_paths[img_type] = os.path.join(DATA_PATH, dir_names["img_depth_normal"], EX_SCENE_ID, str(idx).zfill(4) + f"_{img_type}" + ".png")

img_ex_colors = Image.open(img_ex_paths["colors"])
img_ex_colors_arr = np.array(img_ex_colors)

print("Array shape:", img_ex_colors_arr.shape)
print("Data type:", img_ex_colors_arr.dtype)
rgb_ex_arr = img_ex_colors_arr[:, :, :3]
alpha_ex_arr = img_ex_colors_arr[:, :, 3:]

print(np.sum(alpha_ex_arr != 255))

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

EX_SCENE_ID = "00110bde-f580-40be-b8bb-88715b338a2a_Bedroom-43072" # "bbed00a6-d2e0-4f55-a5cc-09db9e3dd6cd_SecondBedroom-45688"

# Check rendered image
idx = 0
img_types = ["colors", "depth"]
img_ex_paths = {k: 0 for k in img_types}
for img_type in img_types:
    if img_type == "colors":
        img_ex_paths[img_type] = os.path.join(DATA_PATH, dir_names["img_depth_normal"], EX_SCENE_ID, str(idx).zfill(4) + f"_{img_type}" + ".png")
    elif img_type == "depth":
        img_ex_paths[img_type] = os.path.join(DATA_PATH, dir_names["img_depth_normal"], EX_SCENE_ID, str(idx).zfill(4) + f"_{img_type}" + ".png")

img_ex_colors = Image.open(img_ex_paths["colors"])
img_ex_colors_arr = np.array(img_ex_colors)

print("Array shape:", img_ex_colors_arr.shape)
print("Data type:", img_ex_colors_arr.dtype)
rgb_ex_arr = img_ex_colors_arr[:, :, :3]
alpha_ex_arr = img_ex_colors_arr[:, :, 3:]

print(np.sum(alpha_ex_arr != 255))

In [None]:
plt.imshow(rgb_ex_arr)

In [None]:
idx = 2
img_types = ["colors", "depth"]
img_ex_paths = {k: 0 for k in img_types}
for img_type in img_types:
    if img_type == "colors":
        img_ex_paths[img_type] = os.path.join(DATA_PATH, dir_names["img_depth_normal"], EX_SCENE_ID, str(idx).zfill(4) + f"_{img_type}" + ".png")
    elif img_type == "depth":
        img_ex_paths[img_type] = os.path.join(DATA_PATH, dir_names["img_depth_normal"], EX_SCENE_ID, str(idx).zfill(4) + f"_{img_type}" + ".png")

img_ex_colors = Image.open(img_ex_paths["colors"])
img_ex_colors_arr = np.array(img_ex_colors)

print("Array shape:", img_ex_colors_arr.shape)
print("Data type:", img_ex_colors_arr.dtype)
rgb_ex_arr = img_ex_colors_arr[:, :, :3]
alpha_ex_arr = img_ex_colors_arr[:, :, 3:]

print(np.sum(alpha_ex_arr != 255))
plt.imshow(rgb_ex_arr)

In [None]:
alpha_mask = alpha_ex_arr.astype(np.float32) / 255.0
plt.imshow(alpha_mask)

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

EX_SCENE_ID = "00110bde-f580-40be-b8bb-88715b338a2a_Bedroom-43072" # "bbed00a6-d2e0-4f55-a5cc-09db9e3dd6cd_SecondBedroom-45688"

# Check rendered image
idx = 2
img_types = ["colors", "normals", "depth"]
img_ex_paths = {k: 0 for k in img_types}
for img_type in img_types:
    if img_type == "colors":
        img_ex_paths[img_type] = os.path.join(DATA_PATH, dir_names["img_depth_normal"], EX_SCENE_ID, str(idx).zfill(4) + f"_{img_type}" + ".png")
    elif img_type == "depth":
        img_ex_paths[img_type] = os.path.join(DATA_PATH, dir_names["img_depth_normal"], EX_SCENE_ID, str(idx).zfill(4) + f"_{img_type}" + ".png")

# img_ex_normals = Image.open(img_ex_paths["normals"])
# img_ex_normals_arr = np.array(img_ex_normals)
img_ex_depth = Image.open(img_ex_paths["depth"])
img_ex_depth_arr = np.array(img_ex_depth)

# print("[Array shape]", "Normals:", img_ex_normals_arr.shape, "Depth:", img_ex_depth_arr.shape)
# print("[Data type]", img_ex_normals_arr.dtype, img_ex_depth_arr.dtype)

# normal_ex_arr = img_ex_normals_arr[:, :, :3]
# alpha_ex_arr = img_ex_normals_arr[:, :, 3:]
# print("normal image:", normal_ex_arr.shape)

depth_ex_arr = img_ex_depth_arr[:, :, 0]
print(np.all(img_ex_depth_arr[:, :, 0] == img_ex_depth_arr[:, :, 1]))
print("depth image:", depth_ex_arr.shape)

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

EX_SCENE_ID = "00110bde-f580-40be-b8bb-88715b338a2a_Bedroom-43072" # "bbed00a6-d2e0-4f55-a5cc-09db9e3dd6cd_SecondBedroom-45688"

img_ex_depth = Image.open(img_ex_paths["depth"])
img_ex_depth_arr = np.array(img_ex_depth)

print("[Array shape]", "Depth:", img_ex_depth_arr.shape)
print("[Data type]", img_ex_depth_arr.dtype)

depth_ex_arr = img_ex_depth_arr[:, :, 0]
print(np.all(img_ex_depth_arr[:, :, 0] == img_ex_depth_arr[:, :, 1]))
print("depth image:", depth_ex_arr.shape)

In [None]:
# plt.imshow(normal_ex_arr)

In [None]:
# Anti-aliased misses thin structures
print(np.max(depth_ex_arr), np.min(depth_ex_arr))
print(depth_ex_arr[150][0])
plt.imshow(depth_ex_arr, cmap="viridis")
plt.show()

In [None]:
depth_ex_arr_clip = np.clip(depth_ex_arr, 0, 150)
plt.imshow(depth_ex_arr_clip, cmap="viridis")
plt.show()

In [None]:
# (0, 65535) value depth image
depth_ex_arr_scaled = (depth_ex_arr.astype(np.uint32) * 256).astype(np.uint32)
print(np.max(depth_ex_arr_scaled))
plt.imshow(depth_ex_arr_scaled, cmap="viridis")
plt.show()

In [None]:
depth_ex_arr_norm = (depth_ex_arr - np.min(depth_ex_arr)) / (np.max(depth_ex_arr) - np.min(depth_ex_arr))
plt.imshow(depth_ex_arr_norm, cmap="viridis")
plt.show()

In [None]:
depth_ex_arr_log = np.log1p(depth_ex_arr)
depth_ex_arr_norm = (depth_ex_arr_log - np.min(depth_ex_arr_log)) / (np.max(depth_ex_arr_log) - np.min(depth_ex_arr_log))
plt.imshow(depth_ex_arr_norm, cmap="viridis")
plt.show()

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

EX_SCENE_ID = "00110bde-f580-40be-b8bb-88715b338a2a_Bedroom-43072" # "bbed00a6-d2e0-4f55-a5cc-09db9e3dd6cd_SecondBedroom-45688"

# Check rendered image
idx = 0
img_types = ["colors", "normals", "depth"]
img_ex_paths = {k: 0 for k in img_types}
for img_type in img_types:
    img_ex_paths[img_type] = os.path.join(DATA_PATH, dir_names["img_depth_normal_trans"], EX_SCENE_ID, str(idx).zfill(4) + f"_{img_type}" + ".png")

img_ex_colors = Image.open(img_ex_paths["colors"])
img_ex_colors_arr = np.array(img_ex_colors)

print("Array shape:", img_ex_colors_arr.shape)
print("Data type:", img_ex_colors_arr.dtype)
rgb_ex_arr = img_ex_colors_arr[:, :, :3]
alpha_ex_arr = img_ex_colors_arr[:, :, 3:]

print(np.sum(alpha_ex_arr != 255))

In [None]:
alpha_mask = alpha_ex_arr.astype(np.float32) / 255.0
plt.imshow(alpha_mask)

In [None]:
# Check depth
import h5py

# Check hdf5 file
idx = 0
hdf_ex_path = os.path.join(DATA_PATH, dir_names["raw_depth"], EX_SCENE_ID, f"{idx}.hdf5")

with h5py.File(hdf_ex_path, "r") as hdf:
    # List all groups
    print("Keys:", list(hdf.keys()))

from PIL import Image
import matplotlib.pyplot as plt

EX_SCENE_ID = "00110bde-f580-40be-b8bb-88715b338a2a_Bedroom-43072" # "bbed00a6-d2e0-4f55-a5cc-09db9e3dd6cd_SecondBedroom-45688"

# Check rendered image
idx = 1
img_ex_path = os.path.join(DATA_PATH, dir_names["img_fixed_dist"], EX_SCENE_ID, str(idx).zfill(4) + ".png")

img_ex = Image.open(img_ex_path)
img_ex_arr = np.array(img_ex)

print("Array shape:", img_ex_arr.shape)
print("Data type:", img_ex_arr.dtype)
rgb_ex_arr = img_ex_arr[:, :, :3]
alpha_ex_arr = img_ex_arr[:, :, 3:]

print(np.sum(alpha_ex_arr != 255))

In [None]:
plt.imshow(rgb_ex_arr)

In [None]:
alpha_mask = alpha_ex_arr.astype(np.float32) / 255.0
print(alpha_mask[0][0])
plt.imshow(alpha_mask)

In [None]:
import os

EX_SCENE_ID = "00110bde-f580-40be-b8bb-88715b338a2a_Bedroom-43072"
render_path = os.path.join(DATA_PATH, dir_names["img_depth_normal"], EX_SCENE_ID)
depth_path = os.path.join(DATA_PATH, dir_names["img_depth_normal"], EX_SCENE_ID)
camera_path = os.path.join(DATA_PATH, dir_names["cam"], EX_SCENE_ID)

print("[Render path]")
for filename in os.listdir(render_path):
    print(filename)
print("[Depth path]")
for filename in os.listdir(depth_path):
    print(filename)
print("[Camera path]")
for filename in os.listdir(camera_path):
    print(filename)

In [None]:
import numpy as np

camera = np.load(os.path.join(camera_path, "boxes.npz"), allow_pickle=True)
for k in camera.keys():
    print(k)

In [None]:
print(camera["floor_plan_centroid"])

In [None]:
forward_vec = camera["target_coords"] - camera["camera_coords"]
print("[Camera coords]")
print(camera["camera_coords"])
print("[Target coords]")
print(camera["target_coords"])
print("[Forward vector]")
print(forward_vec)

In [None]:
# !pip install mathutils
# !pip install https://github.com/majimboo/py-mathutils/archive/2.78a.zip

In [None]:
from typing import Union, Optional

def rotation_from_forward_vec(forward_vector: Union[np.ndarray, list], up_axis: str = 'Y',
                              inplane_rot: Optional[float] = None) -> np.ndarray:
    """ Returns a camera rotation matrix for the given forward vector and up axis using NumPy

    :param forward_vec: The forward vector which specifies the direction the camera should look.
    :param up_axis: The up axis, usually Y.
    :param inplane_rot: The in-plane rotation in radians. If None is given, the in-plane rotation is determined only
                        based on the up vector.
    :return: The corresponding rotation matrix.
    """
    # Normalize the forward vector
    forward_vector = np.array(forward_vector, dtype=np.float64)
    forward_vector_norm = forward_vector / np.linalg.norm(forward_vector, axis=1, keepdims=True)

    # forward_vec = forward_vec / np.linalg.norm(forward_vec)

    # Define the up vector
    if up_axis.upper() == 'Y':
        up_vec = np.array([0.0, 1.0, 0.0])
    elif up_axis.upper() == 'Z':
        up_vec = np.array([0.0, 0.0, 1.0])
    elif up_axis.upper() == 'X':
        up_vec = np.array([1.0, 0.0, 0.0])
    else:
        raise ValueError("Invalid up_axis. Choose from 'X', 'Y', or 'Z'.")

    # Handle edge cases where forward_vec and up_vec are collinear
    # _dot = np.dot(forward_vec, up_vec)
    # _abs = np.abs(_dot)
    # _close = np.isclose(_abs, 1.0)
    # print(_close)
    # _any = np.any(_close)
    # print(_any)
    # print("======================")
    # if np.allclose(np.abs(np.dot(forward_vec, up_vec)), 1.0):
    #     up_vec = np.array([1.0, 0.0, 0.0]) if up_axis.upper() != 'X' else np.array([0.0, 1.0, 0.0])

    # Compute the right vector (cross product of forward and up)
    right_vec = np.cross(up_vec, forward_vector_norm)
    right_vec /= np.linalg.norm(right_vec)

    # Recompute the true up vector (orthogonal to forward and right)
    up_vec = np.cross(forward_vector_norm, right_vec)

    # Construct the rotation matrix (columns represent right, up, forward)
    rotation_matrix = np.stack((right_vec, up_vec, -forward_vector_norm), axis=-1)

    # Apply in-plane rotation if specified
    if inplane_rot is not None:
        inplane_rotation = np.array([
            [np.cos(inplane_rot), -np.sin(inplane_rot), 0],
            [np.sin(inplane_rot),  np.cos(inplane_rot), 0],
            [0,                   0,                   1]
        ])
        rotation_matrix = rotation_matrix @ inplane_rotation

    return rotation_matrix


forward_vec = camera["target_coords"] - camera["camera_coords"]

# print("forward_vec", forward_vec)
print("[Before] foward_vec:", - forward_vec / np.linalg.norm(forward_vec, axis=1, keepdims=True))
# print("forward_vec", forward_vec)
rotation_matrix = rotation_from_forward_vec(forward_vec)
forward_vec = rotation_matrix[:, :, 2]
print("[After] foward_vec:", forward_vec)
# print(rotation_matrix[0])

In [None]:
import itertools
import json
import zipfile
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Dict, List, Tuple, BinaryIO, Union, Optional

import numpy as np
from PIL import Image

# # From ATISS CameraUtility.py
# def rotation_from_forward_vec(forward, up=np.array([0, 1, 0])) -> np.ndarray:
#     # Normalize the forward vector
#     forward = forward / np.linalg.norm(forward)

#     # Ensure the up vector is not collinear with the forward vector
#     if np.allclose(forward, up) or np.allclose(forward, -up):
#         up = np.array([1, 0, 0]) # Use an alternative default up vector

#     # Compute the right vector
#     right = np.cross(forward, up)
#     right = right / np.linalg.norm(right)

#     # Recompute the up vector to ensure orthogonality
#     up = np.cross(right, forward)

#     return right, up, forward
#     # # Assemble the rotation matrix
#     # rotation_matrix = np.array([right, up, forward])
#     # return rotation_matrix.T

def rotation_from_forward_vec(forward_vec: Union[np.ndarray, list], up_axis: str = 'Y',
                              inplane_rot: Optional[float] = None) -> np.ndarray:
    """ Returns a camera rotation matrix for the given forward vector and up axis using NumPy

    :param forward_vec: The forward vector which specifies the direction the camera should look.
    :param up_axis: The up axis, usually Y.
    :param inplane_rot: The in-plane rotation in radians. If None is given, the in-plane rotation is determined only
                        based on the up vector.
    :return: The corresponding rotation matrix.
    """
    # Normalize the forward vector
    forward_vector = np.array(forward_vec, dtype=np.float64)
    forward_vector_norm = forward_vector / np.linalg.norm(forward_vector, axis=1, keepdims=True)

    # forward_vec = forward_vec / np.linalg.norm(forward_vec)

    # Define the up vector
    if up_axis.upper() == 'Y':
        up_vec = np.array([0.0, 1.0, 0.0])
    elif up_axis.upper() == 'Z':
        up_vec = np.array([0.0, 0.0, 1.0])
    elif up_axis.upper() == 'X':
        up_vec = np.array([1.0, 0.0, 0.0])
    else:
        raise ValueError("Invalid up_axis. Choose from 'X', 'Y', or 'Z'.")

    # Handle edge cases where forward_vec and up_vec are collinear
    # _dot = np.dot(forward_vec, up_vec)
    # _abs = np.abs(_dot)
    # _close = np.isclose(_abs, 1.0)
    # print(_close)
    # _any = np.any(_close)
    # print(_any)
    # print("======================")
    # if np.allclose(np.abs(np.dot(forward_vec, up_vec)), 1.0):
    #     up_vec = np.array([1.0, 0.0, 0.0]) if up_axis.upper() != 'X' else np.array([0.0, 1.0, 0.0])

    # Compute the right vector (cross product of forward and up)
    # right_vec = np.cross(up_vec, forward_vector_norm) # left-hand
    right_vec = np.cross(forward_vector_norm, up_vec)   # right-hand
    right_vec /= np.linalg.norm(right_vec, axis=1, keepdims=True)

    # Recompute the true up vector (orthogonal to forward and right)
    # up_vec = np.cross(forward_vector_norm, right_vec)
    up_vec = np.cross(right_vec, forward_vector_norm)
    up_vec /= np.linalg.norm(up_vec, axis=1, keepdims=True)

    # Construct the rotation matrix (columns represent right, up, forward)
    rotation_matrix = np.stack((right_vec, up_vec, -forward_vector_norm), axis=-1)
    # rotation_matrix = np.stack((right_vec, up_vec, -forward_vector_norm), axis=1)

    # Apply in-plane rotation if specified
    if inplane_rot is not None:
        inplane_rotation = np.array([
            [np.cos(inplane_rot), -np.sin(inplane_rot), 0],
            [np.sin(inplane_rot),  np.cos(inplane_rot), 0],
            [0,                   0,                   1]
        ])
        rotation_matrix = rotation_matrix @ inplane_rotation

    return rotation_matrix

def build_transformation_matrix(camera_coords, rotation_matrix):
    camera_coords = np.asarray(camera_coords, dtype=np.float64)
    rotation_matrix = np.asarray(rotation_matrix, dtype=np.float64)

    assert camera_coords.shape == (3,), "camera_coords must be a 3-element vector"
    assert rotation_matrix.shape == (3, 3), "rotation_matrix must be a 3x3 matrix"

    transformation_matrix = np.eye(4)
    transformation_matrix[:3, :3] = rotation_matrix
    transformation_matrix[:3, 3] = camera_coords

    return transformation_matrix

def local_to_global(local_points, transformation_matrices):
    """
    Converts local coordinates to global coordinates using transformation matrices.
    
    Supports input shapes:
    - (3,) -> Single 3D point
    - (N, 3) -> Multiple 3D points
    - (B, N, 3) -> Batch of multiple 3D point clouds
    
    :param local_points: Local coordinates (shape: (3,), (N, 3), or (B, N, 3)).
    :param transformation_matrices: Transformation matrices (shape: (4, 4), (B, 4, 4)).
    :return: Global coordinates in the same shape as `local_points`.
    """
    local_points = np.asarray(local_points, dtype=np.float64)
    transformation_matrices = np.asarray(transformation_matrices, dtype=np.float64)
    
    # Single point (3,)
    if local_points.ndim == 1 and local_points.shape == (3,):
        if transformation_matrices.shape != (4, 4):
            raise ValueError("For input shape (3,), transformation matrix must be (4, 4).")
        # Convert to homogeneous coordinates
        local_point_homogeneous = np.append(local_points, 1.0)  # Shape: (4,)
        global_point_homogeneous = transformation_matrices @ local_point_homogeneous  # Shape: (4,)
        return global_point_homogeneous[:3]  # Extract (x, y, z)
    
    # Multiple points (N, 3)
    elif local_points.ndim == 2 and local_points.shape[1] == 3:
        if transformation_matrices.shape != (4, 4):
            raise ValueError("For input shape (N, 3), transformation matrix must be (4, 4).")
        # Convert to homogeneous coordinates
        local_points_homogeneous = np.hstack((local_points, np.ones((local_points.shape[0], 1))))  # Shape: (N, 4)
        global_points_homogeneous = local_points_homogeneous @ transformation_matrices.T  # Shape: (N, 4)
        return global_points_homogeneous[:, :3]  # Extract (x, y, z)
    
    # Batch of point clouds (B, N, 3)
    elif local_points.ndim == 3 and local_points.shape[2] == 3:
        if transformation_matrices.ndim != 3 or transformation_matrices.shape[1:] != (4, 4):
            raise ValueError("For input shape (B, N, 3), transformation matrices must be (B, 4, 4).")
        B, N, _ = local_points.shape
        # Convert to homogeneous coordinates
        local_points_homogeneous = np.concatenate((local_points, np.ones((B, N, 1))), axis=-1)  # Shape: (B, N, 4)
        global_points_homogeneous = np.einsum('bij,bkj->bki', transformation_matrices, local_points_homogeneous)  # Shape: (B, N, 4)
        return global_points_homogeneous[:, :, :3]  # Extract (x, y, z)
    
    else:
        raise ValueError("Invalid input shape. Expected (3,), (N, 3), or (B, N, 3).")

@dataclass
class Camera(ABC):
    """
    An object describing how a camera corresponds to pixels in an image.
    """

    @abstractmethod
    def image_coords(self) -> np.ndarray:
        """
        :return: ([self.height, self.width, 2]).reshape(self.height * self.width, 2) image coordinates
        """

    @abstractmethod
    def camera_rays(self, coords: np.ndarray) -> np.ndarray:
        """
        For every (x, y) coordinate in a rendered image, compute the ray of the
        corresponding pixel.

        :param coords: an [N x 2] integer array of 2D image coordinates.
        :return: an [N x 2 x 3] array of [2 x 3] (origin, direction) tuples.
                 The direction should always be unit length.
        """

    def depth_directions(self, coords: np.ndarray) -> np.ndarray:
        """
        For every (x, y) coordinate in a rendered image, get the direction that
        corresponds to "depth" in an RGBD rendering.

        This may raise an exception if there is no "D" channel in the
        corresponding ViewData.

        :param coords: an [N x 2] integer array of 2D image coordinates.
        :return: an [N x 3] array of normalized depth directions.
        """
        _ = coords
        raise NotImplementedError

    @abstractmethod
    def center_crop(self) -> "Camera":
        """
        Creates a new camera with the same intrinsics and direction as this one,
        but with a center crop to a square of the smaller dimension.
        """

    @abstractmethod
    def resize_image(self, width: int, height: int) -> "Camera":
        """
        Creates a new camera with the same intrinsics and direction as this one,
        but with resized image dimensions.
        """

    @abstractmethod
    def scale_scene(self, factor: float) -> "Camera":
        """
        Creates a new camera with the same intrinsics and direction as this one,
        but with the scene rescaled by the given factor.
        """


@dataclass
class ProjectiveCamera(Camera):
    """
    A Camera implementation for a standard pinhole camera.

    The camera rays shoot away from the origin in the z direction, with the x
    and y directions corresponding to the positive horizontal and vertical axes
    in image space.
    """

    origin: np.ndarray
    x: np.ndarray
    y: np.ndarray
    z: np.ndarray
    width: int
    height: int
    x_fov: float
    y_fov: float

    def image_coords(self) -> np.ndarray:
        ind = np.arange(self.width * self.height)
        coords = np.stack([ind % self.width, ind // self.width], axis=1).astype(np.float32)
        return coords

    def camera_rays(self, coords: np.ndarray) -> np.ndarray:
        fracs = (coords / (np.array([self.width, self.height], dtype=np.float32) - 1)) * 2 - 1
        fracs = fracs * np.tan(np.array([self.x_fov, self.y_fov]) / 2)
        directions = self.z + self.x * fracs[:, :1] + self.y * fracs[:, 1:]
        directions = directions / np.linalg.norm(directions, axis=-1, keepdims=True)
        return np.stack([np.broadcast_to(self.origin, directions.shape), directions], axis=1)

    def depth_directions(self, coords: np.ndarray) -> np.ndarray:
        return np.tile((self.z / np.linalg.norm(self.z))[None], [len(coords), 1])

    def resize_image(self, width: int, height: int) -> "ProjectiveCamera":
        """
        Creates a new camera for the resized view assuming the aspect ratio does not change.
        """
        assert width * self.height == height * self.width, "The aspect ratio should not change."
        return ProjectiveCamera(
            origin=self.origin,
            x=self.x,
            y=self.y,
            z=self.z,
            width=width,
            height=height,
            x_fov=self.x_fov,
            y_fov=self.y_fov,
        )

    def center_crop(self) -> "ProjectiveCamera":
        """
        Creates a new camera for the center-cropped view
        """
        size = min(self.width, self.height)
        fov = min(self.x_fov, self.y_fov)
        return ProjectiveCamera(
            origin=self.origin,
            x=self.x,
            y=self.y,
            z=self.z,
            width=size,
            height=size,
            x_fov=fov,
            y_fov=fov,
        )

    def scale_scene(self, factor: float) -> "ProjectiveCamera":
        """
        Creates a new camera with the same intrinsics and direction as this one,
        but with the camera frame rescaled by the given factor.
        """
        return ProjectiveCamera(
            origin=self.origin * factor,
            x=self.x,
            y=self.y,
            z=self.z,
            width=self.width,
            height=self.height,
            x_fov=self.x_fov,
            y_fov=self.y_fov,
        )


class ViewData(ABC):
    """
    A collection of rendered camera views of a scene or object.

    This is a generalization of a NeRF dataset, since NeRF datasets only encode
    RGB or RGBA data, whereas this dataset supports arbitrary channels.
    """

    @property
    @abstractmethod
    def num_views(self) -> int:
        """
        The number of rendered views.
        """

    @property
    @abstractmethod
    def channel_names(self) -> List[str]:
        """
        Get all of the supported channels available for the views.

        This can be arbitrary, but there are some standard names:
        "R", "G", "B", "A" (alpha), and "D" (depth).
        """

    @abstractmethod
    def load_view(self, index: int, channels: List[str]) -> Tuple[Camera, np.ndarray]:
        """
        Load the given channels from the view at the given index.

        :return: a tuple (camera_view, data), where data is a float array of
                 shape [height x width x num_channels].
        """

class Front3DBlenderViewData(ViewData):
    """
    Interact with a dataset zipfile exported by view_data.py.
    """

    def __init__(self, render_path, depth_path, camera_path):
        # self.zipfile = zipfile.ZipFile(f_obj, mode="r")
        # self.infos = []
        # with self.zipfile.open("info.json", "r") as f:
        #     self.info = json.load(f)
        # assert all(k in cam_info for k in ["origin", "x", "y", "z", "x_fov", "y_fov"])
        self.render_path = render_path
        self.depth_path = depth_path
        self.camera_path = camera_path
        camera = np.load(os.path.join(camera_path, "boxes.npz"), allow_pickle=True)
        self.build_cam_info(camera)
        # self.channels = list(self.info.get("channels", "RGBAD"))
        self.channels = list("RGBAD")
        assert set("RGBA").issubset(
            set(self.channels)
        ), "The blender output should at least have RGBA images."
        # names = set(x.filename for x in self.zipfile.infolist())
        # for i in itertools.count():
        #     name = f"{i:05}.json"
        #     if name not in names:
        #         break
        #     with self.zipfile.open(name, "r") as f:
        #         self.infos.append(json.load(f))

    @property
    def num_views(self) -> int:
        return len(self.infos)

    @property
    def channel_names(self) -> List[str]:
        return list(self.channels)
    
    def build_cam_info(self, camera):
        # camera["camera_coords"][1], camera["camera_coords"][2] = camera["camera_coords"][2], camera["camera_coords"][1]
        # print("[Initial]", "camera_coords", camera["camera_coords"][0], "target_coords", camera["target_coords"][0])
        camera_coords = camera["camera_coords"]# [:, [0, 2, 1]]
        target_coords = camera["target_coords"]# [:, [0, 2, 1]]
        floor_plan_centroid = camera["floor_plan_centroid"]# [[0, 2, 1]]
        # camera["camera_coords"] += camera["floor_plan_centroid"]
        # camera["target_coords"] += camera["floor_plan_centroid"]
        print("[Before]", "camera_coords", camera_coords[0], "target_coords", target_coords[0])
        camera_coords = camera_coords + floor_plan_centroid # x coord 0.0, z coord -0.5 set
        target_coords = target_coords + floor_plan_centroid # x coord 0.0, z coord -0.5 set
        print("Floor Centroid:", floor_plan_centroid)
        print("[After]", "camera_coords", camera_coords[0], "target_coords", target_coords[0])

        forward_vec = target_coords - camera_coords
        # forward_vec = np.array([0, 0, 0]) - camera["camera_coords"]
        # right_vec, up_vec, forward_vec = rotation_from_forward_vec(forward_vec)
        rotation_matrix = rotation_from_forward_vec(forward_vec)
        print(rotation_matrix.shape) # (40, 3, 3)
        # transformation_matrix = build_transformation_matrix(camera_coords, rotation_matrix)
        # a = rotation_matrix[:, :, [0, 1, 2]]
        # print(len(a), a)
        right_vec, up_vec, forward_vec = (
            rotation_matrix[..., 0], rotation_matrix[..., 1], rotation_matrix[..., 2]
        ) # rotation_matrix[:, :, [0, 1, 2]]
        # right_vec, up_vec, forward_vec = local_to_global(np.stack([right_vec, up_vec, forward_vec], axis=1), transformation_matrix)
        self.infos = []
        # NOTE: Should we use [0, 0, 0] as origin?
        # NOTE: Is right/up/forward vector correct?
        # NOTE: Why fov value is too large?
        for i in range(len(rotation_matrix)):
            self.infos.append(
                {
                    "origin": camera_coords[i], # Camera origin
                    "x": right_vec[i], # right
                    "y": up_vec[i], # up
                    "z": forward_vec[i], # forward
                    "x_fov": 70, # 70
                    "y_fov": 70, # 70
                }
            )

    def load_view(self, index: int, channels: List[str]) -> Tuple[Camera, np.ndarray]:
        for ch in channels:
            if ch not in self.channel_names:
                raise ValueError(f"unsupported channel: {ch}")

        channel_map = {}
        if any(x in channels for x in "RGBA"):
            rgba = np.array(Image.open(os.path.join(self.render_path, f"{str(index).zfill(4)}_colors.png"))) / 255.0
            channel_map.update(zip("RGBA", rgba.transpose([2, 0, 1])))
        # NOTE: Use "max_depth"?
        if "D" in channels:
            depth = np.array(Image.open(os.path.join(self.depth_path, f"{str(index).zfill(4)}_depth.png")))[:, :, 0]
            inf_dist = depth == np.max(depth)
            # inf_dist = depth == 255
            channel_map["D"] = np.where(
                inf_dist, 
                np.inf, 
                (1) * (depth.astype(np.float32) / 255.0) # max_depth: scaling points
            )

        # The order of channels is user-specified.
        # for k in channels:
        #     print(k, channel_map[k].shape)
        combined = np.stack([channel_map[k] for k in channels], axis=-1)
        # print(combined.shape)
        h, w, _ = combined.shape
        return self.camera(index, w, h), combined
            

    def camera(self, index: int, width: int, height: int) -> ProjectiveCamera:
        info = self.infos[index]
        return ProjectiveCamera(
            origin=np.array(info["origin"], dtype=np.float32),
            x=np.array(info["x"], dtype=np.float32), # right
            y=np.array(info["y"], dtype=np.float32), # up
            z=np.array(info["z"], dtype=np.float32), # forward
            width=width,
            height=height,
            x_fov=info["x_fov"],
            y_fov=info["y_fov"],
        )

In [None]:
import io
import struct
from contextlib import contextmanager
from typing import BinaryIO, Iterator, Optional

import numpy as np


def write_ply(
    raw_f: BinaryIO,
    coords: np.ndarray,
    rgb: Optional[np.ndarray] = None,
    faces: Optional[np.ndarray] = None,
):
    """
    Write a PLY file for a mesh or a point cloud.

    :param coords: an [N x 3] array of floating point coordinates.
    :param rgb: an [N x 3] array of vertex colors, in the range [0.0, 1.0].
    :param faces: an [N x 3] array of triangles encoded as integer indices.
    """
    with buffered_writer(raw_f) as f:
        f.write(b"ply\n")
        f.write(b"format binary_little_endian 1.0\n")
        f.write(bytes(f"element vertex {len(coords)}\n", "ascii"))
        f.write(b"property float x\n")
        f.write(b"property float y\n")
        f.write(b"property float z\n")
        if rgb is not None:
            f.write(b"property uchar red\n")
            f.write(b"property uchar green\n")
            f.write(b"property uchar blue\n")
        if faces is not None:
            f.write(bytes(f"element face {len(faces)}\n", "ascii"))
            f.write(b"property list uchar int vertex_index\n")
        f.write(b"end_header\n")

        if rgb is not None:
            rgb = (rgb * 255.499).round().astype(int)
            vertices = [
                (*coord, *rgb)
                for coord, rgb in zip(
                    coords.tolist(),
                    rgb.tolist(),
                )
            ]
            format = struct.Struct("<3f3B")
            for item in vertices:
                f.write(format.pack(*item))
        else:
            format = struct.Struct("<3f")
            for vertex in coords.tolist():
                f.write(format.pack(*vertex))

        if faces is not None:
            format = struct.Struct("<B3I")
            for tri in faces.tolist():
                f.write(format.pack(len(tri), *tri))


@contextmanager
def buffered_writer(raw_f: BinaryIO) -> Iterator[io.BufferedIOBase]:
    if isinstance(raw_f, io.BufferedIOBase):
        yield raw_f
    else:
        f = io.BufferedWriter(raw_f)
        yield f
        f.flush()

In [None]:
import random
from collections import defaultdict
from dataclasses import dataclass
from typing import BinaryIO, Dict, List, Optional, Union

import blobfile as bf
import numpy as np

import sys
sys.path.append("/root/dev")
sys.path.append("/root/dev/ShapeNet_rendering/get_colored_pcs")
# from ply_util import write_ply

COLORS = frozenset(["R", "G", "B", "A"])


def preprocess(data, channel):
    if channel in COLORS:
        return np.round(data * 255.0)
    return data


@dataclass
class PointCloud:
    """
    An array of points sampled on a surface. Each point may have zero or more
    channel attributes.

    :param coords: an [N x 3] array of point coordinates.
    :param channels: a dict mapping names to [N] arrays of channel values.
    """

    coords: np.ndarray
    channels: Dict[str, np.ndarray]

    @classmethod
    def from_rgbd(cls, vd: ViewData, num_views: Optional[int] = None, idx: int = None) -> "PointCloud":
        """
        Construct a point cloud from the given view data.

        The data must have a depth channel. All other channels will be stored
        in the `channels` attribute of the result.

        Pixels in the rendered views are not converted into points in the cloud
        if they have infinite depth or less than 1.0 alpha.
        """
        channel_names = vd.channel_names
        if "D" not in channel_names:
            raise ValueError(f"view data must have depth channel")
        depth_index = channel_names.index("D")

        all_coords = []
        all_channels = defaultdict(list)

        if num_views is None:
            num_views = vd.num_views
        for i in range(num_views):
            if idx is not None:
                i = idx
            camera, channel_values = vd.load_view(i, channel_names)
            flat_values = channel_values.reshape([-1, len(channel_names)])

            # Create an array of integer (x, y) image coordinates for Camera methods.
            image_coords = camera.image_coords()
            # return image_coords

            # Select subset of pixels that have meaningful depth/color.
            image_mask = np.isfinite(flat_values[:, depth_index])
            # return image_mask
            if "A" in channel_names:
                image_mask_alpha = image_mask & (flat_values[:, channel_names.index("A")] >= 1 - 1e-5)
            # return image_mask_alpha
            # Valid pixel coords & values
            image_coords = image_coords[image_mask_alpha]
            flat_values = flat_values[image_mask_alpha]

            # Use the depth and camera information to compute the coordinates
            # corresponding to every visible pixel.
            camera_rays = camera.camera_rays(image_coords)
            # return camera_rays
            camera_origins = camera_rays[:, 0]
            camera_directions = camera_rays[:, 1]
            depth_dirs = camera.depth_directions(image_coords)
            # return depth_dirs
            ray_scales = flat_values[:, depth_index] / np.sum(
                camera_directions * depth_dirs, axis=-1
            )
            # return ray_scales
            coords = camera_origins + camera_directions * ray_scales[:, None]
            # return coords

            all_coords.append(coords)
            for j, name in enumerate(channel_names):
                if name != "D":
                    all_channels[name].append(flat_values[:, j])

        if len(all_coords) == 0:
            return cls(coords=np.zeros([0, 3], dtype=np.float32), channels={})

        return cls(
            coords=np.concatenate(all_coords, axis=0),
            channels={k: np.concatenate(v, axis=0) for k, v in all_channels.items()},
        )

    @classmethod
    def load(cls, f: Union[str, BinaryIO]) -> "PointCloud":
        """
        Load the point cloud from a .npz file.
        """
        if isinstance(f, str):
            with bf.BlobFile(f, "rb") as reader:
                return cls.load(reader)
        else:
            obj = np.load(f)
            keys = list(obj.keys())
            return PointCloud(
                coords=obj["coords"],
                channels={k: obj[k] for k in keys if k != "coords"},
            )

    def save(self, f: Union[str, BinaryIO]):
        """
        Save the point cloud to a .npz file.
        """
        if isinstance(f, str):
            with bf.BlobFile(f, "wb") as writer:
                self.save(writer)
        else:
            np.savez(f, coords=self.coords, **self.channels)

    def write_ply(self, raw_f: BinaryIO):
        write_ply(
            raw_f,
            coords=self.coords,
            rgb=(
                np.stack([self.channels[x] for x in "RGB"], axis=1)
                if all(x in self.channels for x in "RGB")
                else None
            ),
        )

    def random_sample(self, num_points: int, **subsample_kwargs) -> "PointCloud":
        """
        Sample a random subset of this PointCloud.

        :param num_points: maximum number of points to sample.
        :param subsample_kwargs: arguments to self.subsample().
        :return: a reduced PointCloud, or self if num_points is not less than
                 the current number of points.
        """
        if len(self.coords) <= num_points:
            return self
        indices = np.random.choice(len(self.coords), size=(num_points,), replace=False)
        return self.subsample(indices, **subsample_kwargs)

    def farthest_point_sample(
        self, num_points: int, init_idx: Optional[int] = None, **subsample_kwargs
    ) -> "PointCloud":
        """
        Sample a subset of the point cloud that is evenly distributed in space.

        First, a random point is selected. Then each successive point is chosen
        such that it is furthest from the currently selected points.

        The time complexity of this operation is O(NM), where N is the original
        number of points and M is the reduced number. Therefore, performance
        can be improved by randomly subsampling points with random_sample()
        before running farthest_point_sample().

        :param num_points: maximum number of points to sample.
        :param init_idx: if specified, the first point to sample.
        :param subsample_kwargs: arguments to self.subsample().
        :return: a reduced PointCloud, or self if num_points is not less than
                 the current number of points.
        """
        if len(self.coords) <= num_points:
            return self
        init_idx = random.randrange(len(self.coords)) if init_idx is None else init_idx
        indices = np.zeros([num_points], dtype=np.int64)
        indices[0] = init_idx
        sq_norms = np.sum(self.coords**2, axis=-1)

        def compute_dists(idx: int):
            # Utilize equality: ||A-B||^2 = ||A||^2 + ||B||^2 - 2*(A @ B).
            return sq_norms + sq_norms[idx] - 2 * (self.coords @ self.coords[idx])

        cur_dists = compute_dists(init_idx)
        for i in range(1, num_points):
            idx = np.argmax(cur_dists)
            indices[i] = idx

            # Without this line, we may duplicate an index more than once if
            # there are duplicate points, due to rounding errors.
            cur_dists[idx] = -1

            cur_dists = np.minimum(cur_dists, compute_dists(idx))

        return self.subsample(indices, **subsample_kwargs)

    def subsample(self, indices: np.ndarray, average_neighbors: bool = False) -> "PointCloud":
        if not average_neighbors:
            return PointCloud(
                coords=self.coords[indices],
                channels={k: v[indices] for k, v in self.channels.items()},
            )

        new_coords = self.coords[indices]
        neighbor_indices = PointCloud(coords=new_coords, channels={}).nearest_points(self.coords)

        # Make sure every point points to itself, which might not
        # be the case if points are duplicated or there is rounding
        # error.
        neighbor_indices[indices] = np.arange(len(indices))

        new_channels = {}
        for k, v in self.channels.items():
            v_sum = np.zeros_like(v[: len(indices)])
            v_count = np.zeros_like(v[: len(indices)])
            np.add.at(v_sum, neighbor_indices, v)
            np.add.at(v_count, neighbor_indices, 1)
            new_channels[k] = v_sum / v_count
        return PointCloud(coords=new_coords, channels=new_channels)

    def select_channels(self, channel_names: List[str]) -> np.ndarray:
        data = np.stack([preprocess(self.channels[name], name) for name in channel_names], axis=-1)
        return data

    def nearest_points(self, points: np.ndarray, batch_size: int = 16384) -> np.ndarray:
        """
        For each point in another set of points, compute the point in this
        pointcloud which is closest.

        :param points: an [N x 3] array of points.
        :param batch_size: the number of neighbor distances to compute at once.
                           Smaller values save memory, while larger values may
                           make the computation faster.
        :return: an [N] array of indices into self.coords.
        """
        norms = np.sum(self.coords**2, axis=-1)
        all_indices = []
        for i in range(0, len(points), batch_size):
            batch = points[i : i + batch_size]
            dists = norms + np.sum(batch**2, axis=-1)[:, None] - 2 * (batch @ self.coords.T)
            all_indices.append(np.argmin(dists, axis=-1))
        return np.concatenate(all_indices, axis=0)

    def combine(self, other: "PointCloud") -> "PointCloud":
        assert self.channels.keys() == other.channels.keys()
        return PointCloud(
            coords=np.concatenate([self.coords, other.coords], axis=0),
            channels={
                k: np.concatenate([v, other.channels[k]], axis=0) for k, v in self.channels.items()
            },
        )

In [None]:
# origin = [0, 0, 0]
# right_vec, up_vec, forward_vec = right_vec, up_vec, forward_vec
# width = height = 256
# x_fov, y_fov = x_fov, y_fov

render_path = os.path.join(DATA_PATH, dir_names["img_depth_normal"], EX_SCENE_ID)
depth_path = os.path.join(DATA_PATH, dir_names["img_depth_normal"], EX_SCENE_ID)
camera_path = os.path.join(DATA_PATH, dir_names["cam"], EX_SCENE_ID)

vd = Front3DBlenderViewData(
    render_path=render_path, 
    depth_path=depth_path,
    camera_path=camera_path,
)

In [None]:
# # 3DGen info.json
# import json

# gen3d_camera_path = "/root/node9/data/shape-generation/shapenetv1/rendered_shapenet_uniform_light/03001627/aaba865e99c23e7082db9fca4b68095/rendered_images"

# with open(os.path.join(gen3d_camera_path, "info.json"), "r") as f:
#     info = json.load(f)
# with open(os.path.join(gen3d_camera_path, "00000.json"), "r") as f:
#     model_info = json.load(f)

# print("[INFO]")
# for k, v in info.items():
#     print("KEY:", k)
#     print("VALUE:", v)
# print("===============")
# print("[MODEL INFO]")
# for k, v in model_info.items():
#     print("KEY:", k)
#     print("VALUE:", v)

In [None]:
# !pip install blobfile

In [None]:
num_images = 40
pc = PointCloud.from_rgbd(vd, num_images)

In [None]:
coords = PointCloud.from_rgbd(vd, num_images)
# print(image_coords)
# print(image_mask, np.sum(image_mask)) # [False False False ...  True  True  True] 29717
# print(image_mask_alpha, np.sum(image_mask_alpha)) # [False False False ...  True  True  True] 29717
# print(camera_rays, camera_rays.shape) # (29717, 2, 3) -> Change to camera origin
# print(depth_dirs, depth_dirs.shape) # [[ 0.80217993 -0.3533412   0.4813079 ], ...], (29717, 3)
# print(ray_scales, ray_scales.shape) # (29717,)
# print(np.sum(ray_scales == 0.)) # 318
# print(coords, coords.shape) # point coords, (29717, 3)

In [None]:
print("coords", pc.coords.shape)
print("channels")
for k in pc.channels.keys():
    print("KEY", k, "VALUE", pc.channels[k].shape)

In [None]:
!pip install ipympl

In [None]:
%matplotlib notebook

In [None]:
# %matplotlib widget
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

# plt.ion()
# print("Is interactive?", plt.isinteractive())

num_images = 1
idx = 2
pc = PointCloud.from_rgbd(vd, num_images, idx=idx)

num_pts = 100_000
sampled_pc = pc.random_sample(num_pts)
print("sampled_pc.coords", type(sampled_pc.coords), sampled_pc.coords.shape)
cam_origin = vd.camera(idx, 256, 256).origin[None, :]
print("cam_origin", cam_origin, cam_origin.shape)
# local to global
# right, up, front = vd.camera(idx, 256, 256).x[None, :], vd.camera(idx, 256, 256).y[None, :], vd.camera(idx, 256, 256).z[None, :]
# rotation_matrix = np.stack([right[0], up[0], front[0]], axis=-1)
# transformation_matrix = build_transformation_matrix(cam_origin[0], rotation_matrix)
# cam_origin = local_to_global(cam_origin, transformation_matrix)

# for k in sampled_pc:
#     print(k)
# print(vars(sampled_pc))
# print(sampled_pc.R.shape)
sampled_pc_dict = vars(sampled_pc)
print(type(sampled_pc_dict), sampled_pc_dict.keys())
points = sampled_pc_dict["coords"]
# print(sampled_pc["channels"]["R"].shape)
colors = np.stack([
    sampled_pc_dict["channels"]["R"][:, None], 
    sampled_pc_dict["channels"]["G"][:, None], 
    sampled_pc_dict["channels"]["B"][:, None]
], axis=-1)

fig = plt.figure(figsize=(10, 8))
ax = Axes3D(fig)
# ax = fig.add_subplot(111, projection="3d")
ax.scatter(
    points[:, 0], 
    points[:, 1], 
    points[:, 2], 
    s=1,
    c=colors,
)
ax.scatter(
    cam_origin[:, 0], 
    cam_origin[:, 1], 
    cam_origin[:, 2], 
    s=100,
    c="green",
)
ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_zlabel("Z")
plt.title("Colored Point Cloud")
plt.show()

In [None]:
# !pip install plotly
# !pip install nbformat==4.2.0

In [None]:
# !pip list | grep nbformat

In [None]:
# !pip install --upgrade nbformat

In [None]:
import numpy as np
import plotly.graph_objects as go

# plt.ion()
# print("Is interactive?", plt.isinteractive())

num_images = 1
idx = 2
pc = PointCloud.from_rgbd(vd, num_images, idx=idx)

num_pts = 100_000
sampled_pc = pc.random_sample(num_pts)
print("sampled_pc.coords", type(sampled_pc.coords), sampled_pc.coords.shape)
cam_origin = vd.camera(idx, 256, 256).origin[None, :]
print("cam_origin", cam_origin, cam_origin.shape)
# local to global
# right, up, front = vd.camera(idx, 256, 256).x[None, :], vd.camera(idx, 256, 256).y[None, :], vd.camera(idx, 256, 256).z[None, :]
# rotation_matrix = np.stack([right[0], up[0], front[0]], axis=-1)
# transformation_matrix = build_transformation_matrix(cam_origin[0], rotation_matrix)
# cam_origin = local_to_global(cam_origin, transformation_matrix)

# for k in sampled_pc:
#     print(k)
# print(vars(sampled_pc))
# print(sampled_pc.R.shape)
sampled_pc_dict = vars(sampled_pc)
print(type(sampled_pc_dict), sampled_pc_dict.keys())
points = sampled_pc_dict["coords"]
# print(sampled_pc["channels"]["R"].shape)
colors = np.stack([
    sampled_pc_dict["channels"]["R"], 
    sampled_pc_dict["channels"]["G"], 
    sampled_pc_dict["channels"]["B"]
], axis=-1)
print(colors.shape)

pointcloud = go.Scatter3d(
    x=points[:, 0], 
    y=points[:, 1], 
    z=points[:, 2], 
    mode="markers", 
    marker=dict(
        size=2, 
        color=colors, 
        opacity=0.8
    ), 
    name="PointCloud"
)
camera_origin = go.Scatter3d(
    x=cam_origin[:, 0],
    y=cam_origin[:, 1],
    z=cam_origin[:, 2],
    mode='markers',
    marker=dict(
        size=10,
        color="green"
    ),
    name="Camera Origin"
)
fig = go.Figure(data=[pointcloud, camera_origin])
fig.update_layout(
    scene=dict(
        xaxis_title="X",
        yaxis_title="Y",
        zaxis_title="Z"
    ),
    title="Interactive 3D Point Cloud"
)
fig.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

num_images = 2
pc = PointCloud.from_rgbd(vd, num_images)

num_pts = 100_000
sampled_pc = pc.random_sample(num_pts)
print("sampled_pc.coords", type(sampled_pc.coords), sampled_pc.coords.shape)

# for k in sampled_pc:
#     print(k)
# print(vars(sampled_pc))
# print(sampled_pc.R.shape)
sampled_pc_dict = vars(sampled_pc)
print(type(sampled_pc_dict), sampled_pc_dict.keys())
points = sampled_pc_dict["coords"]
# print(sampled_pc["channels"]["R"].shape)
colors = np.stack([
    sampled_pc_dict["channels"]["R"][:, None], 
    sampled_pc_dict["channels"]["G"][:, None], 
    sampled_pc_dict["channels"]["B"][:, None]
], axis=-1)

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(
    points[:, 0], 
    points[:, 1], 
    points[:, 2], 
    s=1,
    c=colors,
)
ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_zlabel("Z")
plt.title("Colored Point Cloud")
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np



num_images = 1

idx1 = 0
pc1 = PointCloud.from_rgbd(vd, num_images, idx=idx1)
num_pts = 100_000
sampled_pc1 = pc1.random_sample(num_pts)
cam1 = vd.camera(idx1, 256, 256)
cam1_origin = cam1.origin[np.newaxis, :]
print("Cam1:", cam1_origin)
x1, y1, z1 = cam1.x, cam1.y, cam1.z
# print("sampled_pc.coords", type(sampled_pc.coords), sampled_pc.coords.shape)

idx2 = 2
pc2 = PointCloud.from_rgbd(vd, num_images, idx=idx2)
num_pts = 100_000
sampled_pc2 = pc2.random_sample(num_pts)
cam2 = vd.camera(idx2, 256, 256)
cam2_origin = cam2.origin[np.newaxis, :]
print("Cam2:", cam2_origin)
x2, y2, z2 = cam2.x, cam2.y, cam2.z

# for k in sampled_pc:
#     print(k)
# print(vars(sampled_pc))
# print(sampled_pc.R.shape)
sampled_pc_dict1 = vars(sampled_pc1)
sampled_pc_dict2 = vars(sampled_pc2)
# print(type(sampled_pc_dict), sampled_pc_dict.keys())
points1 = sampled_pc_dict1["coords"]
points2 = sampled_pc_dict2["coords"]
# print(sampled_pc["channels"]["R"].shape)
colors1 = np.stack([
    sampled_pc_dict1["channels"]["R"][:, None], 
    sampled_pc_dict1["channels"]["G"][:, None], 
    sampled_pc_dict1["channels"]["B"][:, None]
], axis=-1)
colors2 = np.stack([
    sampled_pc_dict2["channels"]["R"][:, None], 
    sampled_pc_dict2["channels"]["G"][:, None], 
    sampled_pc_dict2["channels"]["B"][:, None]
], axis=-1)
cams = np.concatenate([cam1_origin, cam2_origin], axis=0)
print(cams)

points1 = np.concatenate([points1, cam1_origin], axis=0)
points2 = np.concatenate([points2, cam2_origin], axis=0)

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(
    points1[:, 0], 
    points1[:, 1], 
    points1[:, 2], 
    s=1,
    alpha=0.5,
    c='blue',
)
ax.scatter(
    points2[:, 0], 
    points2[:, 1], 
    points2[:, 2], 
    s=1,
    alpha=0.5,
    c='red',
)
# ax.scatter(
#     cams[:, 0], 
#     cams[:, 1], 
#     cams[:, 2], 
#     s=100,
#     alpha=1.0, 
#     marker=f"${idx1}$",
#     c="green"
# )
ax.scatter(
    cam1_origin[:, 0], 
    cam1_origin[:, 1], 
    cam1_origin[:, 2], 
    s=100,
    alpha=1.0, 
    marker=f"${idx1}$",
    c="green"
)
ax.scatter(
    cam2_origin[:, 0], 
    cam2_origin[:, 1], 
    cam2_origin[:, 2], 
    s=100,
    alpha=1.0, 
    marker=f"${idx2}$",
    c="green"
)

ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_zlabel("Z")
plt.title("Colored Point Cloud")
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np



num_images = 1

idx1 = 0
pc1 = PointCloud.from_rgbd(vd, num_images, idx=idx1)
num_pts = 100_000
sampled_pc1 = pc1.random_sample(num_pts)
cam1 = vd.camera(idx1, 256, 256)
cam1_origin = cam1.origin[np.newaxis, :]
print("Cam1:", cam1_origin)
x1, y1, z1 = cam1.x, cam1.y, cam1.z
rotation_matrix1 = np.stack([x1, y1, z1], axis=-1)
transformation_matrix1 = build_transformation_matrix(cam1_origin[0], rotation_matrix1)
# print("sampled_pc.coords", type(sampled_pc.coords), sampled_pc.coords.shape)

idx2 = 2
pc2 = PointCloud.from_rgbd(vd, num_images, idx=idx2)
num_pts = 100_000
sampled_pc2 = pc2.random_sample(num_pts)
cam2 = vd.camera(idx2, 256, 256)
cam2_origin = cam2.origin[np.newaxis, :]
print("Cam2:", cam2_origin)
x2, y2, z2 = cam2.x, cam2.y, cam2.z
rotation_matrix2 = np.stack([x2, y2, z2], axis=-1)
transformation_matrix2 = build_transformation_matrix(cam2_origin[0], rotation_matrix2)

# for k in sampled_pc:
#     print(k)
# print(vars(sampled_pc))
# print(sampled_pc.R.shape)
sampled_pc_dict1 = vars(sampled_pc1)
sampled_pc_dict2 = vars(sampled_pc2)
# print(type(sampled_pc_dict), sampled_pc_dict.keys())
points1 = sampled_pc_dict1["coords"]
points2 = sampled_pc_dict2["coords"]
# local to global
points1 = local_to_global(points1, transformation_matrix1)
points2 = local_to_global(points2, transformation_matrix2)
# print(sampled_pc["channels"]["R"].shape)
colors1 = np.stack([
    sampled_pc_dict1["channels"]["R"][:, None], 
    sampled_pc_dict1["channels"]["G"][:, None], 
    sampled_pc_dict1["channels"]["B"][:, None]
], axis=-1)
colors2 = np.stack([
    sampled_pc_dict2["channels"]["R"][:, None], 
    sampled_pc_dict2["channels"]["G"][:, None], 
    sampled_pc_dict2["channels"]["B"][:, None]
], axis=-1)
cams = np.concatenate([cam1_origin, cam2_origin], axis=0)
print(cams)

points1 = np.concatenate([points1, cam1_origin], axis=0)
points2 = np.concatenate([points2, cam2_origin], axis=0)

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(
    points1[:, 0], 
    points1[:, 1], 
    points1[:, 2], 
    s=1,
    alpha=0.5,
    c='blue',
)
ax.scatter(
    points2[:, 0], 
    points2[:, 1], 
    points2[:, 2], 
    s=1,
    alpha=0.5,
    c='red',
)
# ax.scatter(
#     cams[:, 0], 
#     cams[:, 1], 
#     cams[:, 2], 
#     s=100,
#     alpha=1.0, 
#     marker=f"${idx1}$",
#     c="green"
# )
ax.scatter(
    cam1_origin[:, 0], 
    cam1_origin[:, 1], 
    cam1_origin[:, 2], 
    s=100,
    alpha=1.0, 
    marker=f"${idx1}$",
    c="green"
)
ax.scatter(
    cam2_origin[:, 0], 
    cam2_origin[:, 1], 
    cam2_origin[:, 2], 
    s=100,
    alpha=1.0, 
    marker=f"${idx2}$",
    c="green"
)

ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_zlabel("Z")
plt.title("Colored Point Cloud")
plt.show()

In [None]:
# Object data example
import sys
sys.path.append("/root/dev")
sys.path.append("/root/dev/ShapeNet_rendering/get_colored_pcs")
from ShapeNet_rendering.get_colored_pcs.view_data import BlenderViewData

data_path = "/root/data/shape-generation/shapenetv1/rendered_shapenet_uniform_light/03001627/aaba865e99c23e7082db9fca4b68095/rendered_images.zip"
vd_obj = BlenderViewData(data_path)

# coords = PointCloud.from_rgbd(vd_obj, 1)
# print(image_coords)
# print(image_mask, np.sum(image_mask)) # [False False False ... False False False] 74790
# print(image_mask_alpha, np.sum(image_mask_alpha)) # [False False False ... False False False] 73494
# print(camera_rays, camera_rays.shape) # (73494, 2, 3)
# print(depth_dirs, depth_dirs.shape) # [[-0.54642206  0.5171353  -0.6587822 ], ...] (73494, 3)
# print(ray_scales, ray_scales.shape) # (73494,)
# print(np.sum(ray_scales == 0.)) # 0
# print(coords, coords.shape) # point coords, (73494, 3)


pc = PointCloud.from_rgbd(vd_obj, 20)
print(type(pc.coords), type(pc.channels))
sampled_pc = pc.random_sample(100_000)
sampled_pc_dict = vars(sampled_pc)
print(type(sampled_pc_dict), sampled_pc_dict.keys())
points = sampled_pc_dict["coords"]
# print(sampled_pc["channels"]["R"].shape)
colors = np.stack([
    sampled_pc_dict["channels"]["R"][:, None], 
    sampled_pc_dict["channels"]["G"][:, None], 
    sampled_pc_dict["channels"]["B"][:, None]
], axis=-1)

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(
    points[:, 0], 
    points[:, 1], 
    points[:, 2], 
    s=1,
    c=colors,
)
ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_zlabel("Z")
plt.title("Colored Point Cloud")
plt.show()