# Profile NeRF with Timeloop and Accelergy

In [1]:
# Add parent dir to path so we can import accelerating_nerfs
import sys
sys.path.append("../")

In [2]:
import json
import numpy as np
import os
import re
import yaml
import traceback

from collections import defaultdict
from accelerating_nerfs.models import VanillaNeRF, patch_forward

# Custom code
from analysis import *
from profiler import Profiler
from notebook_utils import natural_sort

## Load NeRF model
We use vanilla NeRFs which are MLPs. Uncomment the cell below to view the architecture.

In [3]:
# Uncomment to view architecture diagram
# from IPython.display import IFrame
# IFrame("./figures/netdiag-modified.pdf", width=600, height=325)

In [4]:
model = VanillaNeRF()

# We need to patch the forward method for the purpose of mapping to pass in ray directions
# This ensures the bottleneck layer is captured by the converter from pytorch2timeloop 
patch_forward(model)
print(model)

VanillaNeRF(
  (posi_encoder): SinusoidalEncoder()
  (view_encoder): SinusoidalEncoder()
  (mlp): NerfMLP(
    (base): MLP(
      (hidden_activation): ReLU()
      (output_activation): Identity()
      (hidden_layers): ModuleList(
        (0): Linear(in_features=63, out_features=256, bias=True)
        (1): Linear(in_features=256, out_features=256, bias=True)
        (2): Linear(in_features=256, out_features=256, bias=True)
        (3): Linear(in_features=256, out_features=256, bias=True)
        (4): Linear(in_features=256, out_features=256, bias=True)
        (5): Linear(in_features=319, out_features=256, bias=True)
        (6): Linear(in_features=256, out_features=256, bias=True)
        (7): Linear(in_features=256, out_features=256, bias=True)
      )
    )
    (sigma_layer): DenseLayer(
      (hidden_activation): ReLU()
      (output_activation): Identity()
      (hidden_layers): ModuleList()
      (output_layer): Linear(in_features=256, out_features=1, bias=True)
    )
    (bottl



## Convert to Timeloop and load sparsity into the layers

In [5]:
# TODO: play around with the batch size
# The unknown module type warnings are ok
_ = convert_nerf_to_timeloop(model, batch_size=128)
nerf_layer_shapes = load_nerf_layer_shapes()

unknown module type <class 'accelerating_nerfs.models.SinusoidalEncoder'>
unknown module type <class 'accelerating_nerfs.models.SinusoidalEncoder'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unknown module type <class 'accelerating_nerfs.models.MLP'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unknown module type <class 'accelerating_nerfs.models.DenseLayer'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unknown module type <class 'accelerating_nerfs.models.DenseLayer'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unknown module type <class 'accelerating_nerfs.models.MLP'>
unknown module type <class 'accelerating_nerfs.models.NerfMLP'>
unknown module type <class 'accelerating_nerfs.models.VanillaNeRF'>


Converted VanillaNeRF model to Timeloop problems in workload/nerf


In [6]:
!ls ../accelerating_nerfs/sparsity/

2023-05-03_00-00-30_sparsity.json  2023-05-03_18-13-41_volrend_sparsity.json
2023-05-03_00-21-28_sparsity.json


In [7]:
sparsities = load_nerf_sparsities("../accelerating_nerfs/sparsity/2023-05-03_00-21-28_sparsity.json")
layer_to_avg_sparsity = compute_layer_sparsities(sparsities)
print("Layer to Average Sparsity:", json.dumps(layer_to_avg_sparsity, indent=4))

Loaded sparsity results for dict_keys(['chair', 'drums', 'ficus', 'hotdog', 'lego', 'materials', 'mic', 'ship'])
Layer to Average Sparsity: {
    "1": 4.346944171693548e-08,
    "2": 0.5232157788498581,
    "3": 0.6606506746276803,
    "4": 0.6719614964550145,
    "5": 0.635327070883858,
    "6": 0.49655377627334907,
    "7": 0.5577130213002062,
    "8": 0.6088893305723659,
    "9": 0.5989934303659363,
    "10": 0.5785213852133457,
    "11": 2.1946953467908288e-09,
    "12": 0.6979629018407192
}


### Load the sparsity results into the Timeloop layers

In [8]:
add_sparsity_to_nerf_layers(layer_to_avg_sparsity)

Layer 1 added densities: {'Inputs': 0.9999999565305583, 'Weights': 1.0, 'Outputs': 0.9999999565305583}
Layer 2 added densities: {'Inputs': 0.47678422115014185, 'Weights': 1.0, 'Outputs': 0.47678422115014185}
Layer 3 added densities: {'Inputs': 0.33934932537231965, 'Weights': 1.0, 'Outputs': 0.33934932537231965}
Layer 4 added densities: {'Inputs': 0.32803850354498554, 'Weights': 1.0, 'Outputs': 0.32803850354498554}
Layer 5 added densities: {'Inputs': 0.364672929116142, 'Weights': 1.0, 'Outputs': 0.364672929116142}
Layer 6 added densities: {'Inputs': 0.5034462237266509, 'Weights': 1.0, 'Outputs': 0.5034462237266509}
Layer 7 added densities: {'Inputs': 0.44228697869979383, 'Weights': 1.0, 'Outputs': 0.44228697869979383}
Layer 8 added densities: {'Inputs': 0.39111066942763406, 'Weights': 1.0, 'Outputs': 0.39111066942763406}
Layer 9 added densities: {'Inputs': 0.40100656963406367, 'Weights': 1.0, 'Outputs': 0.40100656963406367}
Layer 10 added densities: {'Inputs': 0.42147861478665427, 'Weig

In [9]:
raise NotImplementedError

NotImplementedError: 

### Configure saving of profiling results
This isn't important so you can ignore the details.

In [None]:
# Accumulate results in this dictionary
profile_results = {}

# Setup saving the profiling results
results_dir = "profile_results"
os.makedirs(results_dir, exist_ok=True)


def save_results():
    all_other_results = {}
    
    for arch, arch_results in profile_results.items():
        # Write the super long results to it's own file
        arch_results_path = os.path.join(results_dir, f"{arch}_results.json")
        with open(arch_results_path, "w") as f:
            json.dump(arch_results["results"], f, indent=4)
            print(f"Saved {arch} results to {arch_results_path}")
        
        # Accumulate the other results as they're shorter and more readable
        other_results = {
            k: v for k, v in arch_results.items()
            if k != "results"
        }
        # Have a pointer to the separate results file
        other_results["results"] = os.path.abspath(arch_results_path)
        all_other_results[arch] = other_results
    
    results_path = os.path.join(results_dir, "results.json")
    with open(results_path, "w") as f:
        json.dump(all_other_results, f, indent=4)

    print(f"Saved profile results to {results_path}")

### Loading NeRF layer shapes
You can ignore this, it's for populating the profiling results with additional debug information.

### Add densities to NeRF layer problems
Load the layer sparsities from our earlier analysis.

## Profile using Timeloop and Accelergy
I think we can safely ignore the 'unknown module type' warnings.

In [None]:
# Don't use simba_like or simple_output_stationary as the mapper constraints are too stringent
# archs = ["eyeriss_like", "simba_like_modified", "simple_output_stationary_modified", "simple_weight_stationary"]
# archs = ["simba_like_modified", "simple_output_stationary_modified"]
archs = ["eyeriss_like_onchip_compression"]
failed_archs = set()

for arch in archs:
    print(20 * '=')
    print(f"Running {arch}")
    print(20 * '=')
    
    # Profile - you should only need to change batch_size if anything
    try:
        profiler = Profiler(
            top_dir='workloads',
            sub_dir='nerf',
            timeloop_dir=f"designs/{arch}",
            arch_name=arch,
            model=model,
            input_size=(1, 3),
            batch_size=128,  # TODO: adjust this, ICARUS uses 128
            convert_fc=True,
            exception_module_names=[]
        )
        add_density_to_nerf_layers()
        results, summary, layer_summary = profiler.profile()
    except Exception as e:
        # TODO: figure this out https://piazza.com/class/ldf2iof72w51sl/post/44
        traceback.print_exc()
        print(f"ERROR: could not run profiler for {arch}, do not trust these results!")
        failed_archs.add(arch)
        continue
    
    # Add nerf layer shapes to the layer summary
    for layer_id in layer_summary:
        layer_summary[layer_id].update(nerf_layer_shapes()[layer_id])
        
    # Print summary information
    for k, v in summary.items():
        print(f"{k}: {v}")
        
    profile_results[arch] = {
        "results": results,
        "summary": summary,
        "layer_summary": layer_summary,
    }
    save_results()

In [None]:
for arch, arch_results in profile_results.items():
    print(f"===== {arch} =====")
    print(summary)