<a href="https://colab.research.google.com/github/umututku03/3D-Rendering-Optimization/blob/main/3d_rendering_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import shutil
import numpy as np
from PIL import Image, ImageDraw
import torch
from torch import nn, optim
from math import sin, cos, radians
import matplotlib.pyplot as plt

In [None]:
# Ensure reproducibility
torch.manual_seed(14)
np.random.seed(14)

In [None]:
# Define the cube vertices and edges
cube_vertices = np.array([
    [1, 1, -1], [-1, 1, -1], [-1, -1, -1], [1, -1, -1],
    [1, 1, 1], [-1, 1, 1], [-1, -1, 1], [1, -1, 1]
])

cube_edges = [
    (0, 1), (1, 2), (2, 3), (3, 0),
    (4, 5), (5, 6), (6, 7), (7, 4),
    (0, 4), (1, 5), (2, 6), (3, 7)
]

In [None]:
class LookAt(nn.Module):
    def __init__(self):
        super(LookAt, self).__init__()

    def forward(self, from_pos, to_pos, up):
        forward = from_pos - to_pos
        forward = forward / torch.norm(forward)
        right = torch.cross(up, forward)
        right = right / torch.norm(right)
        up = torch.cross(forward, right)
        view_matrix = torch.eye(4)
        view_matrix[0, :3] = right
        view_matrix[1, :3] = up
        view_matrix[2, :3] = forward
        view_matrix[:3, 3] = -torch.matmul(view_matrix[:3, :3], from_pos.unsqueeze(1)).squeeze()
        return view_matrix

In [None]:
class ProjectVertex(nn.Module):
    def __init__(self, projection_matrix):
        super(ProjectVertex, self).__init__()
        self.projection_matrix = projection_matrix

    def forward(self, vertex, view_matrix):
        vertex_homogeneous = torch.cat((vertex, torch.ones(1)), dim=0)
        transformed_vertex = view_matrix @ vertex_homogeneous
        projected_vertex = self.projection_matrix @ transformed_vertex
        projected_vertex = projected_vertex[:2] / projected_vertex[3]
        return projected_vertex

In [None]:
# Initial camera setup
target_pos = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32)
up_vector = torch.tensor([0.0, 1.0, 0.0], dtype=torch.float32)

# Projection matrix setup (assuming perspective projection)
fov = 60  # Field of view in degrees
aspect_ratio = 1.0  # Aspect ratio
near = 1.0  # Near clipping plane
far = 100.0  # Far clipping plane
fov_rad = np.radians(fov)
f = 1 / np.tan(fov_rad / 2)
projection_matrix = torch.tensor([
    [f / aspect_ratio, 0, 0, 0],
    [0, f, 0, 0],
    [0, 0, (far + near) / (near - far), (2 * far * near) / (near - far)],
    [0, 0, -1, 0]
], dtype=torch.float32)

In [None]:
look_at = LookAt()
project_vertex = ProjectVertex(projection_matrix)

In [None]:
# Load the saved 2D points and camera extrinsics from the previous NumPy rendering process
num_steps = 100
extrinsic_dir = "/kaggle/input/3d-rendering-engine-numpy-params/extrinsics"
points_dir = "/kaggle/input/3d-rendering-computed-2d-points-gt/computed_2d_points"

numpy_extrinsics = [torch.tensor(np.loadtxt(os.path.join(extrinsic_dir, f"extrinsic_{step}.txt")), dtype=torch.float32) for step in range(num_steps)]
numpy_2d_points = [torch.tensor(np.loadtxt(os.path.join(points_dir, f"points_{step}.txt")), dtype=torch.float32) for step in range(num_steps)]

In [None]:
# Parameters to optimize
camera_pos = torch.tensor([7.0, 1.0, 1.0], dtype=torch.float32, requires_grad=True)
target_pos = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32, requires_grad=True)

In [None]:
# Optimizer
optimizer = optim.Adam([camera_pos, target_pos], lr=0.01)

In [None]:
# Loss function
mse_loss = nn.MSELoss()

In [None]:
# Gradient descent optimization loop
num_iterations = 1000
for iteration in range(num_iterations):
    total_loss = 0
    for step in range(num_steps):
        alpha = step * 2 * np.pi / num_steps
        camera_pos = torch.tensor([
            target_pos[0] + 5 * cos(alpha),
            target_pos[1] - 3.5,
            target_pos[2] + 5 * sin(alpha)
        ], dtype=torch.float32, requires_grad=True)

        view_matrix = look_at(camera_pos, target_pos, up_vector)
        projected_points = []
        for vertex in cube_vertices:
            v = torch.tensor(vertex, dtype=torch.float32)
            p = project_vertex(v, view_matrix)
            p = (500 / 2 * (p + 1)).int()
            projected_points.append(p)

        projected_points = torch.stack(projected_points)
        loss = mse_loss(projected_points.float(), numpy_2d_points[step])
        total_loss += loss

    total_loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if iteration % 100 == 0:
        print(f"Iteration {iteration}, Loss: {total_loss.item()}")

# Save the optimized parameters
torch.save({'camera_pos': initial_camera_pos, 'target_pos': target_pos}, 'optimized_params.pth')

In [None]:
# Visualize the optimized results (optimization using gradient descent)
errors = []
for step in range(num_steps):
    alpha = step * 2 * np.pi / num_steps
    camera_pos[0] = target_pos[0] + 5 * cos(alpha)  # x-coordinate
    camera_pos[2] = target_pos[2] + 5 * sin(alpha)  # z-coordinate

    view_matrix = look_at(camera_pos, target_pos, up_vector)
    projected_points = []
    for vertex in cube_vertices:
        v = torch.tensor(vertex, dtype=torch.float32)
        p = project_vertex(v, view_matrix)
        p = (500 / 2 * (p + 1)).int()
        projected_points.append(p)

    projected_points = torch.stack(projected_points)
    error = torch.norm(projected_points.float() - numpy_2d_points[step], dim=1)
    errors.append(error)

errors = torch.cat(errors).detach().numpy()
mean_error = np.mean(errors)
max_error = np.max(errors)

print(f'Errors: {errors}')
print(f'Mean Error: {mean_error}')
print(f'Max Error: {max_error}')

In [None]:
# Visualize the errors
plt.plot(errors)
plt.title('Errors for Each Frame')
plt.xlabel('Frame')
plt.ylabel('Error')
plt.show()