# Setup

In [1]:
import os

In [2]:
# os.chdir('..')

In [3]:
os.getcwd()

'c:\\Users\\ay011\\PycharmProjects\\NeRF'

In [4]:
import yaml
import torch
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
env = yaml.safe_load(open('./env.yml'))

# 디버깅할 땐 cuda OOM 나옴
# utils.get_device 도 관리하기
# device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device = 'cpu'

# torch.set_default_tensor_type(torch.cuda.FloatTensor)
torch.set_default_tensor_type(torch.FloatTensor)

os.environ['device'] = device

args = env['args']
args_render = args['rendering']
args_model = args['model']
args_blender = args['blender']

config = yaml.safe_load(open('./config.yml'))[args['dataset']]

In [6]:
global args_render

In [7]:
args

{'dataset': 'llff',
 'datadir': './dataset/nerf_llff_data/fern',
 'rendering': {'multires': 10,
  'multires_views': 4,
  'raw_noise_std': 0.0,
  'use_viewdirs': False,
  'N_samples': 64,
  'N_importance': 0,
  'perturb': 1.0,
  'i_embed': 0,
  'render_test': True,
  'render_only': False,
  'render_factor': 0},
 'model': {'netdepth': 8,
  'netwidth': 256,
  'netdepth_fine': 8,
  'netwidth_fine': 256,
  'N_rand': 4096,
  'lrate': 0.0005,
  'lrate_decay': 250,
  'lrate_schedule_gamma': 0.1,
  'N_iters': 1,
  'chunk': 10,
  'netchunk': 20,
  'no_batching': True,
  'no_reload': True,
  'ft_path': 'None',
  'random_seed': 'None,',
  'precrop_iters': 0,
  'precrop_frac': 0.5},
 'blender': {'white_bkgd': True}}

In [8]:
def logger(func):
    def wrap(*args, **kwargs):
        print(args)
        print(kwargs)
        with open(f'logs/debug/{func.__name__}.txt', 'w') as f:
            f.write("="*50)
            f.write(f'Args')
            for v in args:
                f.write(f'\n\t{v}')
            
            f.write("="*50)            
            f.write(f'Kwargs{"="*50}')
            for k, v in kwargs.items():
                f.write(f'\n\t{k}:{v}')
            
            res = func(*args, **kwargs)
            
            f.write("="*50)            
            f.write(f'Return{"="*50}')
            for v in args:
                f.write(f'\n\t{v}')
        return res
    wrap.__name__ = func.__name__
    return wrap

# Data

## Function _load_data

In [9]:
from src.load_llff import _load_data

In [10]:
@logger
def internal_load_data_log(*args, **kwargs):
    res = _load_data(*args, **kwargs)
    return res

In [11]:
poses, bds, imgs = internal_load_data_log(args['datadir'])

('./dataset/nerf_llff_data/fern',)
{}
Loaded image data (3024, 4032, 3, 20) [3024.         4032.         3260.52633288]


poses : (3, 5, Img num) -> (Img num, 3, 5)

Correct rotation matrix ordering and move variable dim to axis 0

In [12]:
poses = np.concatenate([poses[:, 1:2, :], -poses[:, 0:1, :], poses[:, 2:, :]], 1)
poses.shape

(3, 5, 20)

In [13]:
poses = np.moveaxis(poses, -1, 0).astype(np.float32)
poses.shape

(20, 3, 5)

imgs : (Img num, 3024, 4032, C)

In [14]:
imgs = np.moveaxis(imgs, -1, 0).astype(np.float32)
imgs.shape

(20, 3024, 4032, 3)

bds : (Img num, 2)

In [15]:
bds = np.moveaxis(bds, -1, 0).astype(np.float32)
bds.shape

(20, 2)

## Function : Recenter pose

구조가 바뀌는 것은 아닌듯

In [16]:
from src.load_llff import recenter_poses

In [17]:
@logger
def recenter_poses_log(*args, **kwargs):
    res = recenter_poses(*args, **kwargs)
    return res

In [18]:
poses = recenter_poses_log(poses)

(array([[[ 9.89630759e-01, -2.24225149e-02, -1.41874000e-01,
         -3.67917895e+00,  3.02400000e+03],
        [-2.72090789e-02, -9.99121010e-01, -3.18883993e-02,
         -1.60379159e+00,  4.03200000e+03],
        [-1.41034275e-01,  3.54180038e-02, -9.89370942e-01,
         -2.76802063e-01,  3.26052637e+03]],

       [[ 9.96017277e-01, -9.11767501e-03, -8.86931792e-02,
         -1.65886891e+00,  3.02400000e+03],
        [-1.19412709e-02, -9.99436915e-01, -3.13572139e-02,
         -1.60914540e+00,  4.03200000e+03],
        [-8.83573368e-02,  3.22914347e-02, -9.95565295e-01,
         -2.42866620e-01,  3.26052637e+03]],

       [[ 9.98846710e-01, -2.10212101e-03, -4.79670279e-02,
          2.71888852e-01,  3.02400000e+03],
        [-2.82207131e-03, -9.99884307e-01, -1.49464915e-02,
         -1.46625757e+00,  4.03200000e+03],
        [-4.79300618e-02,  1.50646204e-02, -9.98737097e-01,
         -1.55975223e-01,  3.26052637e+03]],

       [[ 9.98899817e-01,  3.77176155e-04,  4.68940847e-0

## Function : poses_avg

In [19]:
from src.load_llff import poses_avg, normalize, viewmatrix

In [20]:
@logger
def poses_avg_log(*args, **kwargs):
    res = poses_avg(*args, **kwargs)
    return res

In [21]:
poses.shape

(20, 3, 5)

In [22]:
poses[5, :3, -1]

array([3024.    , 4032.    , 3260.5264], dtype=float32)

In [23]:
hwf = poses[0, :3, -1:]

center = poses[:, :3, 3].mean(0)
vec2 = normalize(poses[:, :3, 2].sum(0))
up = poses[:, :3, 1].sum(0)
c2w = np.concatenate([viewmatrix(vec2, up, center), hwf], 1)

c2w : (3,5), Camera to world


In [24]:
c2w.shape

(3, 5)

In [25]:
c2w

array([[ 1.0000000e+00,  0.0000000e+00,  0.0000000e+00, -2.3841858e-08,
         3.0240000e+03],
       [ 0.0000000e+00,  1.0000000e+00, -1.8730975e-09, -8.3446501e-08,
         4.0320000e+03],
       [-0.0000000e+00,  1.8730975e-09,  1.0000000e+00, -2.9802323e-09,
         3.2605264e+03]], dtype=float32)

## Function : load_llff_data

In [26]:
from src.load_llff import load_llff_data

In [27]:
@logger
def load_llff_data_log(*args, **kwargs):
    res = load_llff_data(*args, **kwargs)
    return res

In [28]:
images, poses, bds, render_poses, i_test = load_llff_data_log(args['datadir'], config['factor'], recenter=config['recenter'], bd_factor=config['bd_factor'], spherify=config['spherify'])

('./dataset/nerf_llff_data/fern', 8)
{'recenter': True, 'bd_factor': 0.75, 'spherify': True}
Loaded image data (378, 504, 3, 20) [378.         504.         407.56579161]
Loaded ./dataset/nerf_llff_data/fern 16.985296178676084 80.00209740336334
Data:
(20, 3, 5) (20, 378, 504, 3) (20, 2)
HOLDOUT view is 12


- Images: (Img num, H, W, C)

In [29]:
images.shape

(20, 378, 504, 3)

- poses : (Img num, 3, 5)

여기에서 5는 페이퍼에서 말한 5D vector이고 3D는 location, 2D는 viewing direction에 해당하는 것으로 보인다!

In [30]:
poses.shape

(20, 3, 5)

- bds : (Img num, 2)

In [31]:
bds.shape

(20, 2)

- render_poses: (120, 3, 5)

In [32]:
render_poses.shape

(120, 3, 5)

## Function : load_data

In [33]:
def load_data(dataset):
    K = None
    if dataset == 'llff':
        images, poses, bds, render_poses, i_test = load_llff_data(args['datadir'], config['factor'], recenter=config['recenter'], bd_factor=config['bd_factor'], spherify=config['spherify'])
        hwf = poses[0, :3, -1]
        poses = poses[:, :3, :4]
        if env['VERBOSE']:
            print('Loaded llff', images.shape, render_poses.shape, hwf, args['datadir'])
        if not isinstance(i_test, list):
            i_test = [i_test]

        if config['llffhold'] > 0:
            if env['VERBOSE']:
                print('Auto LLFF holdout,', config['llffhold'])
            i_test = np.arange(images.shape[0])[::config['llffhold']]

        i_val = i_test
        i_train = np.array([i for i in np.arange(int(images.shape[0])) if
                            (i not in i_test and i not in i_val)])

        if env['VERBOSE']:
            print('DEFINING BOUNDS')
        if config['no_ndc']:
            # TODO:  original code used tf so that both var type was tf.Tensor and temporarily using numpy ndarray.
            # near = tf.reduce_min(bds) * .9
            # far = tf.reduce_max(bds) * 1.
            near = np.min(bds) * .9
            far = np.max(bds) * 1.
        else:
            near = 0.
            far = 1.
        if env['VERBOSE']:
            print('NEAR FAR', near, far)
        return images, i_train, i_val, i_test, hwf, K, poses, render_poses, near, far
    else:
        print('Unknown dataset type', args['dataset'], 'exiting')
        return

In [34]:
@logger
def load_data_log(*args, **kwargs):
    res = load_data(*args, **kwargs)
    return res

In [35]:
images, i_train, i_val, i_test, hwf, K, poses, render_poses, near, far = load_data_log(args['dataset'])

('llff',)
{}
Loaded image data (378, 504, 3, 20) [378.         504.         407.56579161]
Loaded ./dataset/nerf_llff_data/fern 16.985296178676084 80.00209740336334
Data:
(20, 3, 5) (20, 378, 504, 3) (20, 2)
HOLDOUT view is 12


hwf :  정말 이미지의 H, W, f를 순서대로 가짐

In [36]:
hwf

array([378.    , 504.    , 407.5658], dtype=float32)

K

In [37]:
poses.shape

(20, 3, 4)

## [***]POSE

![img](./assets/pipeline-02-typical_perspective_model.PNG)

결과적으로 pose를 이해해야 한다.

(20, 3, 5)에서 3은 3차원 공간에서의 좌표를 말하는 것이고 20은 img의 개수라고 보았을 때 5개의 값이 무엇을 의미하는지 알아야 한다!

0 : 

1 : zenith vector(up을 말하는 것으로 사용됨)

2 : o vector (origin을 말하는 것으로 focal space에서 z축을 말하기도 하는 것! focal space에서의 O)

3 : d vector (페이퍼에서 말하는 direction을 나타내는 것), 그래서 center를 말한다!

4(-1) : H,W,f를 가지고 있다.

In [38]:
# pose의 사용 예시
# spherify_poses
rays_d = poses[:, :3, 2:3]
rays_o = poses[:, :3, 3:4]

In [39]:
# pose 사용 예시
# poses_avg
center = poses[:, :3, 3].mean(0) # 
vec2 = normalize(poses[:, :3, 2].sum(0))
up = poses[:, :3, 1].sum(0)

In [40]:
poses.shape

(20, 3, 4)

# Preprocess

## function Cast intrinsic

![img](./assets/pipeline-02-typical_perspective_model.PNG)

In [41]:
def cast_intrinsics(poses, hwf, i_test, render_poses, K):
    # Cast intrinsics to right types
    H, W, focal = hwf
    H, W = int(H), int(W)
    hwf = [H, W, focal]

    if K is None:
        K = np.array([
            [focal, 0, 0.5*W],
            [0, focal, 0.5*H],
            [0, 0, 1]
        ])

    return H, W, hwf, focal, K

In [42]:
@logger
def cast_intrinsics_log(*args, **kwargs):
    res = cast_intrinsics(*args, **kwargs)
    return res

In [43]:
H, W, hwf, focal, K = cast_intrinsics_log(poses, hwf, i_test, render_poses, K)

(array([[[ 8.78954172e-01, -4.66176510e-01, -1.00593865e-01,
         -1.32555887e-01],
        [ 4.68626022e-01,  8.83396268e-01,  8.16544401e-04,
         -6.97950041e-03],
        [ 8.84835944e-02, -4.78586070e-02,  9.94927227e-01,
          1.00994658e+00]],

       [[ 8.89305353e-01, -4.54287022e-01, -5.25301248e-02,
         -7.66615495e-02],
        [ 4.55913961e-01,  8.89695764e-01,  2.41654422e-02,
          2.09218860e-02],
        [ 3.57577913e-02, -4.54396755e-02,  9.98326898e-01,
          1.00560606e+00]],

       [[ 8.94239604e-01, -4.47504848e-01, -8.66433233e-03,
         -2.12724563e-02],
        [ 4.47564781e-01,  8.93823624e-01,  2.76603047e-02,
          4.34193164e-02],
        [-4.63372655e-03, -2.86127888e-02,  9.99579906e-01,
          9.99835312e-01]],

       [[ 8.92048776e-01, -4.45129722e-01,  7.81578198e-02,
          5.64301908e-02],
        [ 4.40903425e-01,  8.95137429e-01,  6.58275709e-02,
          7.57788867e-02],
        [-9.92637798e-02, -2.4261351

In [44]:
K

array([[407.5657959,   0.       , 252.       ],
       [  0.       , 407.5657959, 189.       ],
       [  0.       ,   0.       ,   1.       ]])

# Model

Function : batchify_rays 
	 Argument : ('rays_flat', 'chunk', 'kwargs', 'i', 'ret', 'k')
Function : render_rays 
	 Argument : ('ray_batch', 'network_fn', 'network_query_fn', 'N_samples', 'retraw', 'lindisp', 'perturb', 'N_importance', 'network_fine', 'white_bkgd', 'raw_noise_std', 'verbose', 'pytest', 'N_rays', 'rays_o', 'rays_d', 'viewdirs', 'bounds', 'near', 'far', 't_vals', 'z_vals', 'mids', 'upper', 'lower', 't_rand', 'pts', 'raw', 'rgb_map', 'disp_map', 'acc_map', 'weights', 'depth_map', 'rgb_map_0', 'disp_map_0', 'acc_map_0', 'z_vals_mid', 'z_samples', '_', 'run_fn', 'ret', 'k')

## Function create nerf model

In [45]:
from src.model_functional import create_nerf, run_network
from src.rendering import get_render_kwargs
from torch.optim import Adam

In [46]:
def create_nerf_model(near, far):
    models, embed_fn, embeddirs_fn = create_nerf(
        args_render['multires'],
        args_render['multires_views'],
        args_render['i_embed'],
        args_render['use_viewdirs'],
        args_model['netdepth'],
        args_model['netdepth_fine'],
        args_model['netwidth'],
        args_model['netwidth_fine'],
        args_render['N_importance'],
    )
    grad_vars = []
    for model in models.values():
        if model is not None:
            grad_vars += list(model.parameters())
    optimizer = Adam(grad_vars, lr=args_model['lrate'], betas=(0.9, 0.999))

    render_kwargs_train, render_kwargs_test = get_render_kwargs(
        args_render['perturb'],
        args_render['N_importance'],
        models['model_fine'],
        args_render['N_samples'],
        models['model'],
        args_render['use_viewdirs'],
        args_blender['white_bkgd'],
        args_render['raw_noise_std'],
        config['no_ndc'],
        config['lindisp'],
        args['dataset'],
        )
    network_query_fn = lambda inputs, viewdirs, network_fn: run_network(inputs, viewdirs, network_fn,
                                                                embed_fn=embed_fn,
                                                                embeddirs_fn=embeddirs_fn,
                                                                netchunk=args_model['netchunk'])

    # bds_dict = {
    #     'near': tf.cast(near, tf.float32),
    #     'far': tf.cast(far, tf.float32),
    # }
    bds_dict = {
        'near': near,
        'far': far
    }
    render_kwargs_train['network_query_fn'] = network_query_fn
    render_kwargs_train.update(bds_dict)
    render_kwargs_test.update(bds_dict)
    if env['VERBOSE']:
        print('Render Train args : ', render_kwargs_train)
        print('Render Test args : ', render_kwargs_test)

    return render_kwargs_train, render_kwargs_test, optimizer

In [47]:
@logger
def create_nerf_model_log(*args, **kwargs):
    res = create_nerf_model(*args, **kwargs)
    return res

In [48]:
render_kwargs_train, render_kwargs_test, optimizer = create_nerf_model_log(near, far)

(0.4737630307674408, 2.4794018268585205)
{}
Not ndc!


In [49]:
render_kwargs_train

{'network_query_fn': <function __main__.create_nerf_model.<locals>.<lambda>(inputs, viewdirs, network_fn)>,
 'perturb': 1.0,
 'N_importance': 0,
 'network_fine': None,
 'N_samples': 64,
 'network_fn': NeRF(
   (dense_layers): ModuleDict(
     (Dense_0): Dense(
       (act): ReLU()
       (w): Linear(in_features=63, out_features=256, bias=True)
     )
     (Dense_1): Dense(
       (act): ReLU()
       (w): Linear(in_features=256, out_features=256, bias=True)
     )
     (Dense_2): Dense(
       (act): ReLU()
       (w): Linear(in_features=256, out_features=256, bias=True)
     )
     (Dense_3): Dense(
       (act): ReLU()
       (w): Linear(in_features=256, out_features=256, bias=True)
     )
     (Dense_4): Dense(
       (act): ReLU()
       (w): Linear(in_features=256, out_features=256, bias=True)
     )
     (Dense_5): Dense(
       (act): ReLU()
       (w): Linear(in_features=319, out_features=256, bias=True)
     )
     (Dense_6): Dense(
       (act): ReLU()
       (w): Linear(in_f

# Train

In [50]:
# line 200 at main.py

img_i = np.random.choice(i_train)
target = images[img_i]
target = torch.Tensor(target).to(device)
pose = poses[img_i, :3,:4]

In [51]:
img_i

5

In [52]:
target.shape

torch.Size([378, 504, 3])

In [53]:
pose.shape

(3, 4)

## [***]Function : get_rays

내부 연산

In [54]:
# from src.ray import get_rays_np, get_rays

def get_rays(H, W, K, c2w):
    i, j = torch.meshgrid(torch.linspace(0, W-1, W), torch.linspace(0, H-1, H))  # pytorch's meshgrid has indexing='ij'
    i = i.t()
    j = j.t()
    dirs = torch.stack([(i-K[0][2])/K[0][0], -(j-K[1][2])/K[1][1], -torch.ones_like(i)], -1)
    dirs = dirs.to(os.environ['device'])
    # Rotate ray directions from camera frame to the world frame
    rays_d = torch.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1)  # dot product, equals to: [c2w.dot(dir) for dir in dirs]
    # Translate camera frame's origin to the world frbame. It is the origin of all rays.
    rays_o = c2w[:3,-1].expand(rays_d.shape)
    return rays_o, rays_d


In [55]:
@logger
def get_rays_log(*args, **kwargs):
    res = get_rays(*args, **kwargs)
    return res

In [56]:
print(H, W)

378 504


In [57]:
# intrinsic matrix
K

array([[407.5657959,   0.       , 252.       ],
       [  0.       , 407.5657959, 189.       ],
       [  0.       ,   0.       ,   1.       ]])

In [58]:
# c2w in get_rays
c2w_tmp = torch.Tensor(pose)
c2w_tmp

tensor([[ 0.8937, -0.4401,  0.0872,  0.1188],
        [ 0.4365,  0.8978,  0.0576,  0.0695],
        [-0.1036, -0.0134,  0.9945,  0.9865]])

In [59]:
c2w_tmp.device

device(type='cpu')

In [60]:
i, j = torch.meshgrid(torch.linspace(0, W-1, W), torch.linspace(0, H-1, H))  # pytorch's meshgrid has indexing='ij'
i = i.t()
j = j.t()
print(i.shape, '\n', j.shape)

torch.Size([378, 504]) 
 torch.Size([378, 504])


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [61]:
# directions = [i, j]^T - [K[0][2], K[1][2]]^T
dirs = torch.stack([(i-K[0][2])/K[0][0], -(j-K[1][2])/K[1][1], -torch.ones_like(i)], -1)
dirs.shape

torch.Size([378, 504, 3])

In [62]:
print(dirs[..., np.newaxis, :].shape)
print(c2w_tmp[:3,:3].shape)
rays_d = torch.sum(dirs[..., np.newaxis, :] * c2w_tmp[:3,:3], -1)
print(rays_d.shape)

torch.Size([378, 504, 1, 3])
torch.Size([3, 3])
torch.Size([378, 504, 3])


In [63]:
print(c2w_tmp)
print(c2w_tmp[:3, -1])
rays_o = c2w_tmp[:3,-1].expand(rays_d.shape)
rays_o.shape

tensor([[ 0.8937, -0.4401,  0.0872,  0.1188],
        [ 0.4365,  0.8978,  0.0576,  0.0695],
        [-0.1036, -0.0134,  0.9945,  0.9865]])
tensor([0.1188, 0.0695, 0.9865])


torch.Size([378, 504, 3])

최종구현

In [64]:
rays_o, rays_d = get_rays_log(H, W, K, torch.Tensor(pose))

(378, 504, array([[407.5657959,   0.       , 252.       ],
       [  0.       , 407.5657959, 189.       ],
       [  0.       ,   0.       ,   1.       ]]), tensor([[ 0.8937, -0.4401,  0.0872,  0.1188],
        [ 0.4365,  0.8978,  0.0576,  0.0695],
        [-0.1036, -0.0134,  0.9945,  0.9865]]))
{}


In [65]:
rays_o.shape

torch.Size([378, 504, 3])

In [66]:
rays_d.shape

torch.Size([378, 504, 3])

## Function : render

In [67]:
from src.rendering import render

In [68]:
@logger
def render_log(*args, **kwargs):
    res = render(*args, **kwargs)
    return res

In [69]:
batch_rays = torch.stack([rays_o, rays_d], 0) 
batch_rays.shape

torch.Size([2, 378, 504, 3])

In [70]:
rgb, disp, acc, extras = render_log(H, W, K, chunk=args_model['chunk'], rays=batch_rays,
                                                verbose=i < 10, retraw=True,
                                                **render_kwargs_train)

(378, 504, array([[407.5657959,   0.       , 252.       ],
       [  0.       , 407.5657959, 189.       ],
       [  0.       ,   0.       ,   1.       ]]))
{'chunk': 10, 'rays': tensor([[[[ 0.1188,  0.0695,  0.9865],
          [ 0.1188,  0.0695,  0.9865],
          [ 0.1188,  0.0695,  0.9865],
          ...,
          [ 0.1188,  0.0695,  0.9865],
          [ 0.1188,  0.0695,  0.9865],
          [ 0.1188,  0.0695,  0.9865]],

         [[ 0.1188,  0.0695,  0.9865],
          [ 0.1188,  0.0695,  0.9865],
          [ 0.1188,  0.0695,  0.9865],
          ...,
          [ 0.1188,  0.0695,  0.9865],
          [ 0.1188,  0.0695,  0.9865],
          [ 0.1188,  0.0695,  0.9865]],

         [[ 0.1188,  0.0695,  0.9865],
          [ 0.1188,  0.0695,  0.9865],
          [ 0.1188,  0.0695,  0.9865],
          ...,
          [ 0.1188,  0.0695,  0.9865],
          [ 0.1188,  0.0695,  0.9865],
          [ 0.1188,  0.0695,  0.9865]],

         ...,

         [[ 0.1188,  0.0695,  0.9865],
          [ 0.

AssertionError: 

In [None]:
!nvidia-smi

Mon Mar 27 01:21:25 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 466.27       Driver Version: 466.27       CUDA Version: 11.3     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:04:00.0  On |                  N/A |
| 41%   36C    P8     7W / 125W |    825MiB /  6144MiB |     14%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces