# NeRF Quick Start Notebook

Plank-ing Hyundong 3D Reconstruction Project
Created 2022.07.05 <br>

There are 4 Steps in this notebook.<br>
1. Sampling images from video
2. Get camera poses with COLMAP
3. Run NeRF
4. Get mesh file


__If you clone this repository in your local PC, set the directory path at first__

__You must use the GPU runtime.__

## 📚 Directory Structure

- PlankHyundong

    - __nerf_quick_start.ipynb__

    - notebook
        - nerf_colab.ipynb
        - nerf_wandb_colab.ipynb
        - colmap_colab.ipynb
        - extract_mesh_colab.ipynb
        - sampling_colab.ipynb

    - data
        - video
            - video.MOV
        - (images)
            - ..
        - (logs)
            - ..
            

--------

__We use tensorflow ver.1__

In [None]:
try:
    %tensorflow_version 1.x
except ValueError:
    # 만약 %tensorflow_version 1.x magic 명령어가 작동하지 않는 경우
    !pip uninstall --yes tensorflow
    !pip install tensorflow==1.15
    import tensorflow
    print(tensorflow.__version__)

### Setting directory path

In [None]:
# path to data folder
video_path = '/content/data/video/video.MOV'
img_path = '/content/data/images'
logs_path = '/content/data/logs'
path = '/content/data'

## Step 1
### Video Sampling

- load data folder from PlankHyundong repository.

In [None]:
!git init
!git remote add origin https://github.com/ProtossDragoon/PlankHyundong
!git config core.sparsecheckout true
!echo data/* >> ./.git/info/sparse-checkout
!git pull origin main
!rm -rf ./git

import os

os.mkdir(img_path)
os.mkdir(logs_path)

- Sampling

In [None]:
import cv2

# Set the number of frame
frame = 50

vidcap = cv2.VideoCapture(video_path)
                
cnt, num = 0, 1 # cnt -> Input frame #, num -> output Frame #.

total_length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
cycle = int(total_length / frame) # calculate cycle

while vidcap.isOpened():
    ret,image = vidcap.read()
    if num > frame:
        break
    if ret and cnt % cycle == 0:  
        
        try:
            cv2.imwrite(f"{img_path}/image{num}.jpg", image)
            num+=1
        except:
            print("fail")
            
    cnt += 1
    
vidcap.release()

## 

## Step 2
### Run COLMAP to get camera pose

- Install dependent packages

In [None]:

!sudo apt-get install \
    git \
    cmake \
    build-essential \
    libboost-program-options-dev \
    libboost-filesystem-dev \
    libboost-graph-dev \
    libboost-regex-dev \
    libboost-system-dev \
    libboost-test-dev \
    libeigen3-dev \
    libsuitesparse-dev \
    libfreeimage-dev \
    libgoogle-glog-dev \
    libgflags-dev \
    libglew-dev \
    qtbase5-dev \
    libqt5opengl5-dev \
    libcgal-dev \
    libcgal-qt5-dev

- Install Ceres-solver
<br><br>It takes 10 ~ 20 minutes..

In [None]:
!sudo apt-get install libatlas-base-dev libsuitesparse-dev
!git clone https://ceres-solver.googlesource.com/ceres-solver
%cd ceres-solver
!git checkout $(git describe --tags)
%mkdir build
%cd build
!cmake .. -DBUILD_TESTING=OFF -DBUILD_EXAMPLES=OFF
!make
!sudo make install

- Install COLMAP
<br><br>
It takes 10 ~ 20 minutes

In [None]:
!sudo apt-get install libmetis-dev # https://github.com/colmap/colmap/issues/1469

In [None]:
!git clone https://github.com/colmap/colmap
%cd colmap
!git checkout $(git describe --tags)
%mkdir build
%cd build
!cmake ..
!make
!sudo make install
!CC=/usr/bin/gcc-6 CXX=/usr/bin/g++-6 cmake ..

- Run LLFF

In [None]:
%cd /content
!git clone https://github.com/Fyusion/LLFF

In [None]:
%cd /content/LLFF

!python imgs2poses.py {path}

## Step 3
### Run NeRF

- install dependent-packages

In [None]:
!apt-get update
!sudo apt -qq install imagemagick
!pip install ConfigArgParse -qqq
!pip install imageio-ffmpeg -qqq

- clone NeRF source code

In [None]:
%cd ..
!git clone https://github.com/yyongjae/nerf.git

In [None]:
%cd nerf
!ls -al

- Setting the Hyperparameter

In [None]:
from datetime import datetime
now = datetime.now().strftime('%y%m%d_%H%M%S')

max_iter = 30000
dataset_name = f'hyundong360_{frame}'
downsample_factor = 64 #@param {type:"slider", min:1, max:64, step:1}
netdepth = 4 #@param {type:"slider", min:4, max:16, step:2}
netwidth = 64 #@param {type:"slider", min:64, max:256, step:4}
experiment_name = f'{dataset_name}_{downsample_factor}_downsampled_{now}'
max_iter = 30000 #@param
learning_rate = 0.01 #@param
video_saving_cnt = 3 #@param {type:"slider", min:1, max:10, step:1}
n_samples = 32 #@param {type:"slider", min:32, max:256, step:32}

# fine 모델에서 사용되는 샘플 개수는 coarse 모델의 sampling 개수의 2배로 설정한다.
# 공식 논문에서 제안하는 대로, 64이면 128.
n_importance = n_samples * 2

# Reproduce 를 위해 고정 random_seed 를 사용
random_seed = 777 #@param

# tradeoff: memory <-> speed (training 에는 속도와 성능 모두에 영향을 미치지 않음. 학습 도중 동영상을 만들 때 OOM 이 난다면 충분히 낮출 것)
rendering_speed = 2048 #@param {type:"slider", min:1024, max:16384, step:1024}

# tradeoff: memory <-> result
n_points_per_ray = 65536 #@param {type:"slider", min:2048, max:262144, step:1024}


print(f'experiment: {experiment_name}')


# Make config.txt file.
f = open(f"{logs_path}/config.txt", 'w')
def make_config(a, b):
  
  if b is None:
    f.write(a + '\n')
  else:
    data = a + ' = ' + b + '\n'
    f.write(data)


make_config("maxiter", str(max_iter))
make_config("datadir", img_path)
make_config("basedir", logs_path)
make_config("dataset_type", 'llff')
make_config('factor', str(downsample_factor))
make_config('netdepth', str(netdepth))
make_config('netwidth', str(netwidth))
make_config('netdepth_fine', str(netdepth))
make_config('netwidth_fine', str(netwidth))
make_config('chunk', str(rendering_speed))
make_config('netchunk', str(n_points_per_ray))
make_config('lrate', str(learning_rate))
make_config('i_video', str(max_iter // video_saving_cnt))
make_config('expname', experiment_name)
make_config('N_samples', str(n_samples))
make_config('N_importance', str(n_importance))
make_config('random_seed', str(random_seed))
make_config('raw_noise_std', '1.0')
make_config('use_viewdirs', None)
make_config('no_ndc', None)
make_config('spherify', None)
make_config('lindisp', None)

f.close()

In [None]:
!python run_nerf.py \
    --datadir {path} \
    --basedir {logs_path} \
    --dataset_type llff \
    --factor {downsample_factor} \
    --netdepth {netdepth} \
    --netwidth {netwidth} \
    --netdepth_fine {netdepth} \
    --netwidth_fine {netwidth} \
    --chunk {rendering_speed} \
    --netchunk {n_points_per_ray} \
    --lrate {learning_rate} \
    --i_video {max_iter // video_saving_cnt} \
    --expname {experiment_name} \
    --N_samples {n_samples} \
    --N_importance {n_importance} \
    --random_seed {random_seed} \
    --raw_noise_std 1.0 \
    --use_viewdirs \
    --no_ndc \
    --spherify \
    --lindisp

## Step 4
### Get Mesh file

- install dependent-packages

In [None]:
!pip install imageio
!pip install imageio-ffmpeg
!pip install configargparse
!apt install imagemagick
!pip install PyMCubes
!pip install trimesh
!pip install pyrender
!sudo apt -qq install imagemagick
!pip install ConfigArgParse -qqq
!pip install imageio-ffmpeg -qqq

- clone official code

In [None]:
!git clone https://github.com/bmild/nerf.git

%cd nerf
!ls -al

import run_nerf
import run_nerf_helpers

import os
import numpy as np
import pprint
import imageio
import matplotlib.pyplot as plt

In [None]:
expname = experiment_name
config = f'{logs_path}/config.txt'

print('Args:')
print(open(config, 'r').read())

In [None]:
parser = run_nerf.config_parser()
ft_str = '--ft_path {}'.format(os.path.join(logs_path, 'model_030000.npy'))
args = parser.parse_args('--config {} '.format(config) + ft_str)
print(args)

# create nerf model
_, render_kwargs_test, start, grad_vars, models = run_nerf.create_nerf(args)

bds_dict = {
    'near' : tf.cast(2., tf.float32),
    'far' : tf.cast(6., tf.float32),
}
render_kwargs_test.update(bds_dict)

print('Render kwargs:')
pprint.pprint(render_kwargs_test)

net_fn = render_kwargs_test['network_query_fn']
print(net_fn)

c2w = np.eye(4)[:3,:4].astype(np.float32) # identity pose matrix
c2w[2,-1] = 4.
H, W, focal = 800, 800, 1200.
down = 8
test = run_nerf.render(H//down, W//down, focal/down, c2w=c2w, **render_kwargs_test)
img = np.clip(test[0],0,1)
plt.imshow(img)
plt.show()

- Query network on dense 3d grid of points

In [None]:
N = 256
t = np.linspace(-1.2, 1.2, N+1)

query_pts = np.stack(np.meshgrid(t, t, t), -1).astype(np.float32)
print(query_pts.shape)
sh = query_pts.shape
flat = query_pts.reshape([-1,3])


def batchify(fn, chunk):
    if chunk is None:
        return fn
    def ret(inputs):
        return tf.concat([fn(inputs[i:i+chunk]) for i in range(0, inputs.shape[0], chunk)], 0)
    return ret
    
    
fn = lambda i0, i1 : net_fn(flat[i0:i1,None,:], viewdirs=np.zeros_like(flat[i0:i1]), network_fn=render_kwargs_test['network_fine'])
chunk = 1024*64
raw = np.concatenate([fn(i, i+chunk).numpy() for i in range(0, flat.shape[0], chunk)], 0)
raw = np.reshape(raw, list(sh[:-1]) + [-1])
sigma = np.maximum(raw[...,-1], 0.)

print(raw.shape)
plt.hist(np.maximum(0,sigma.ravel()), log=True)
plt.show()

- Marching cubes with PyMCubes

In [None]:
import mcubes

threshold = 50.
print('fraction occupied', np.mean(sigma > threshold))
vertices, triangles = mcubes.marching_cubes(sigma, threshold)
print('done', vertices.shape, triangles.shape)

mcubes.export_obj(vertices, triangles, '{logs_path}/extract_mesh.obj')

- Save out video with pyrender

In [None]:
import trimesh

mesh = trimesh.Trimesh(vertices / N - .5, triangles)
mesh.show()

In [None]:
os.environ["PYOPENGL_PLATFORM"] = "egl"
import pyrender
from load_blender import pose_spherical

scene = pyrender.Scene()
scene.add(pyrender.Mesh.from_trimesh(mesh, smooth=False))

# Set up the camera -- z-axis away from the scene, x-axis right, y-axis up
camera = pyrender.PerspectiveCamera(yfov=np.pi / 3.0)

camera_pose = pose_spherical(-20., -40., 1.).numpy()
nc = pyrender.Node(camera=camera, matrix=camera_pose)
scene.add_node(nc)

# Set up the light -- a point light in the same spot as the camera
light = pyrender.PointLight(color=np.ones(3), intensity=4.0)
nl = pyrender.Node(light=light, matrix=camera_pose)
scene.add_node(nl)

# Render the scene
r = pyrender.OffscreenRenderer(640, 480)
color, depth = r.render(scene)

plt.imshow(color)
plt.show()
plt.imshow(depth)
plt.show()

In [None]:
from IPython.display import HTML
from base64 import b64encode

imgs = []
for th in np.linspace(0, 360., 120+1)[:-1]:
    camera_pose = pose_spherical(th, -40., 1.).numpy()
    scene.set_pose(nc, pose=camera_pose)
    imgs.append(r.render(scene)[0])
f = '{logs_path}/mesh_turntable.mp4'
imageio.mimwrite(f, imgs, fps=30)
print('done')

mp4 = open(f,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls autoplay loop>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)