# Profile NeRF with Timeloop and Accelergy

In [1]:
# Add parent dir to path so we can import accelerating_nerfs
import sys
sys.path.append("../")

In [2]:
import json
import numpy as np
import os
import pandas as pd
import re
import yaml
import traceback

from collections import defaultdict
from accelerating_nerfs.models import VanillaNeRF, patch_forward

# Custom code
from analysis import *
from profiler import Profiler
from notebook_utils import natural_sort

  warn(f"Failed to load image Python extension: {e}")
  device: torch.device = torch.device("cpu"),


## Load NeRF model
We use vanilla NeRFs which are MLPs. Uncomment the cell below to view the architecture.

In [3]:
# Uncomment to view architecture diagram
# from IPython.display import IFrame
# IFrame("./figures/netdiag-modified.pdf", width=600, height=325)

In [4]:
model = VanillaNeRF()

# We need to patch the forward method for the purpose of mapping to pass in ray directions
# This ensures the bottleneck layer is captured by the converter from pytorch2timeloop 
patch_forward(model)
print(model)

VanillaNeRF(
  (posi_encoder): SinusoidalEncoder()
  (view_encoder): SinusoidalEncoder()
  (mlp): NerfMLP(
    (base): MLP(
      (hidden_activation): ReLU()
      (output_activation): Identity()
      (hidden_layers): ModuleList(
        (0): Linear(in_features=63, out_features=256, bias=True)
        (1): Linear(in_features=256, out_features=256, bias=True)
        (2): Linear(in_features=256, out_features=256, bias=True)
        (3): Linear(in_features=256, out_features=256, bias=True)
        (4): Linear(in_features=256, out_features=256, bias=True)
        (5): Linear(in_features=319, out_features=256, bias=True)
        (6): Linear(in_features=256, out_features=256, bias=True)
        (7): Linear(in_features=256, out_features=256, bias=True)
      )
    )
    (sigma_layer): DenseLayer(
      (hidden_activation): ReLU()
      (output_activation): Identity()
      (hidden_layers): ModuleList()
      (output_layer): Linear(in_features=256, out_features=1, bias=True)
    )
    (bottl



## Convert to Timeloop and load sparsity into the layers

In [5]:
# TODO: play around with the batch size
batch_size = 128

# The unknown module type warnings are ok
# We create a copy for nerf-sparse so we can copy the layer sparsities over to the configurations
_ = convert_nerf_to_timeloop(model, batch_size=batch_size, sub_dir="nerf")
_ = convert_nerf_to_timeloop(model, batch_size=batch_size, sub_dir="nerf-sparse")
nerf_layer_shapes = load_nerf_layer_shapes()

unknown module type <class 'accelerating_nerfs.models.SinusoidalEncoder'>
unknown module type <class 'accelerating_nerfs.models.SinusoidalEncoder'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unknown module type <class 'accelerating_nerfs.models.MLP'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unknown module type <class 'accelerating_nerfs.models.DenseLayer'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unknown module type <class 'accelerating_nerfs.models.DenseLayer'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unknown module type <class 'accelerating_nerfs.models.MLP'>
unknown module type <class 'accelerating_nerfs.models.NerfMLP'>
unknown module type <class 'accelerating_nerfs.models.VanillaNeRF'>
unknown module type <class 'accelerating_nerfs.models.SinusoidalEncoder'>
unknown module type <class 'accelerating_nerfs.models.SinusoidalEncoder'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unk

Converted VanillaNeRF model to Timeloop problems in workloads/nerf
Converted VanillaNeRF model to Timeloop problems in workloads/nerf-sparse


In [6]:
!ls ../accelerating_nerfs/sparsity/

chair_sparsity.json  hotdog_sparsity.json     mic_sparsity.json
drums_sparsity.json  lego_sparsity.json       ship_sparsity.json
ficus_sparsity.json  materials_sparsity.json


In [7]:
# Load layer sparsity results
sparsities = load_nerf_sparsities("../accelerating_nerfs/sparsity")
layer_to_avg_sparsity = compute_layer_sparsities(sparsities)
print("Layer to Average Sparsity:", json.dumps(layer_to_avg_sparsity, indent=4))

Loaded sparsity results for dict_keys(['chair', 'drums', 'ficus', 'hotdog', 'lego', 'materials', 'mic', 'ship'])
Layer to Average Sparsity: {
    "1": {
        "input_sparsity": {
            "mean": 4.3946616992536214e-08,
            "std": 4.918707813129488e-07,
            "num": 27708
        },
        "output_sparsity": {
            "mean": 0.5542258571408987,
            "std": 0.03391004992789977,
            "num": 27708
        }
    },
    "2": {
        "input_sparsity": {
            "mean": 0.5542258571408987,
            "std": 0.03391004992789977,
            "num": 27708
        },
        "output_sparsity": {
            "mean": 0.6947238747829664,
            "std": 0.03819135012459017,
            "num": 27708
        }
    },
    "3": {
        "input_sparsity": {
            "mean": 0.6947238747829664,
            "std": 0.03819135012459017,
            "num": 27708
        },
        "output_sparsity": {
            "mean": 0.7004855604878076,
            "std

### Load the sparsity results into the Timeloop layers

In [8]:
add_sparsity_to_nerf_layers(layer_to_avg_sparsity, layer_dir="workloads/nerf-sparse")

Layer 1 added densities: {'Inputs': 0.999999956053383, 'Weights': 1.0, 'Outputs': 0.44577414285910133}
Layer 2 added densities: {'Inputs': 0.44577414285910133, 'Weights': 1.0, 'Outputs': 0.3052761252170336}
Layer 3 added densities: {'Inputs': 0.3052761252170336, 'Weights': 1.0, 'Outputs': 0.29951443951219237}
Layer 4 added densities: {'Inputs': 0.29951443951219237, 'Weights': 1.0, 'Outputs': 0.3106220461071759}
Layer 5 added densities: {'Inputs': 0.3106220461071759, 'Weights': 1.0, 'Outputs': 0.3248678330742185}
Layer 6 added densities: {'Inputs': 0.458201136358505, 'Weights': 1.0, 'Outputs': 0.3499051030453799}
Layer 7 added densities: {'Inputs': 0.3499051030453799, 'Weights': 1.0, 'Outputs': 0.3061052845077121}
Layer 8 added densities: {'Inputs': 0.3061052845077121, 'Weights': 1.0, 'Outputs': 0.30124270859846225}
Layer 9 added densities: {'Inputs': 0.30124270859846225, 'Weights': 1.0, 'Outputs': 0.3477407934631964}
Layer 10 added densities: {'Inputs': 0.3129442400712461, 'Weights': 1

### Configure saving of profiling results
This isn't important so you can ignore the details.

In [9]:
# Accumulate results in this dictionary
profile_results = {}

# Setup saving the profiling results
results_dir = "profile_results"
os.makedirs(results_dir, exist_ok=True)


def save_results():
    all_other_results = {}
    
    for arch, arch_results in profile_results.items():
        # Write the super long results to it's own file
        arch_results_path = os.path.join(results_dir, f"{arch}_results.json")
        with open(arch_results_path, "w") as f:
            json.dump(arch_results["results"], f, indent=4)
        
        # Accumulate the other results as they're shorter and more readable
        other_results = {
            k: v for k, v in arch_results.items()
            if k != "results"
        }
        # Have a pointer to the separate results file
        other_results["results"] = os.path.abspath(arch_results_path)
        all_other_results[arch] = other_results
    
    results_path = os.path.join(results_dir, "results.json")
    with open(results_path, "w") as f:
        json.dump(all_other_results, f, indent=4)

    print(f"Saved profile results to {results_path}")

## Profile using Timeloop and Accelergy
I think we can safely ignore the 'No handlers found'

To rerun things, delete the existing results in the `profiled_libs/` directory.

In [10]:
# Don't use simba_like or simple_output_stationary as the mapper constraints are too stringent
archs_and_sparse = [
    ("eyeriss_like", False),
    ("simple_weight_stationary", False),
    ("eyeriss_like_onchip_compression", True)
]
failed_archs = set()

for (arch, is_sparse) in archs_and_sparse:
    msg = f"Running {arch}"
    print(len(msg) * '=')
    print(msg)
    print(len(msg) * '=')
    
    # Profile - you shouldn't need to change anything here
    try:
        profiler = Profiler(
            top_dir='workloads',
            sub_dir='nerf' if not is_sparse else 'nerf-sparse',
            timeloop_dir=f"designs/{arch}",
            arch_name=arch,
            model=model,
            input_size=(1, 3),
        )
        results, summary, layer_summary = profiler.profile()
    except Exception as e:
        traceback.print_exc()
        print(f"ERROR: could not run profiler for {arch}, do not trust these results!")
        failed_archs.add(arch)
        continue
    
    # Add nerf layer shapes to the layer summary
    for layer_id in layer_summary:
        layer_summary[layer_id].update(nerf_layer_shapes[layer_id])
        
    # Print summary information
    for k, v in summary.items():
        print(f"{k}: {v}")
        
    profile_results[arch] = {
        "results": results,
        "summary": summary,
        "layer_summary": layer_summary,
    }
    save_results()

Running eyeriss_like
Loaded profiled lib from ./profiled_libs/eyeriss_like_profiled_lib.json


running timeloop to get energy and latency...: 0it [00:00, ?it/s]

Saved profiled lib to ./profiled_libs/eyeriss_like_profiled_lib.json





total_area: 0.0
total_energy: 1211.45
total_cycle: 1136384.0
num_params: 595844
macs: 593450
activation_size: 2300.0
Saved profile results to profile_results/results.json
Running simple_weight_stationary


running timeloop to get energy and latency...: 100%|██████████| 6/6 [07:57<00:00, 79.64s/it] 
Traceback (most recent call last):
  File "/tmp/ipykernel_269/3037256322.py", line 25, in <module>
    results, summary, layer_summary = profiler.profile()
  File "/home/workspace/notebooks/profiler.py", line 348, in profile
    self.populate_profiled_lib(layer_info)
  File "/home/workspace/notebooks/profiler.py", line 297, in populate_profiled_lib
    info = {key: layer_info[layer_id][key] for key in keys_to_include}
  File "/home/workspace/notebooks/profiler.py", line 297, in <dictcomp>
    info = {key: layer_info[layer_id][key] for key in keys_to_include}
KeyError: 'energy'


ERROR: could not run profiler for simple_weight_stationary, do not trust these results!
Running eyeriss_like_onchip_compression
Loaded profiled lib from ./profiled_libs/eyeriss_like_onchip_compression_profiled_lib.json
Sparse optimization is enabled for layer 1
Sparse optimization is enabled for layer 2
Sparse optimization is enabled for layer 3
Sparse optimization is enabled for layer 4
Sparse optimization is enabled for layer 5
Sparse optimization is enabled for layer 6
Sparse optimization is enabled for layer 7
Sparse optimization is enabled for layer 8
Sparse optimization is enabled for layer 9
Sparse optimization is enabled for layer 10
Sparse optimization is enabled for layer 11
Sparse optimization is enabled for layer 12


running timeloop to get energy and latency...: 100%|██████████| 12/12 [02:47<00:00, 13.95s/it]


Saved profiled lib to ./profiled_libs/eyeriss_like_onchip_compression_profiled_lib.json
total_area: 0.0
total_energy: 804.7200000000001
total_cycle: 1136384.0
num_params: 595844
macs: 593450
activation_size: 2300.0
Saved profile results to profile_results/results.json




## Analyze the results

In [11]:
# Massage results into dataframes
layer_dfs = {}
all_summary = {}

for arch, arch_results in profile_results.items():
    all_summary[arch] = arch_results["summary"]

    # Load layer results into dataframe
    df_layer = pd.DataFrame.from_dict(arch_results["layer_summary"], orient="index")
    df_layer = df_layer.drop('name', axis=1)
    df_layer.index.name = "layer_id"
    layer_dfs[arch] = df_layer
    
df_summary = pd.DataFrame.from_dict(all_summary, orient="index")

### Overall Summary

In [12]:
df_summary

Unnamed: 0,total_area,total_energy,total_cycle,num_params,macs,activation_size
eyeriss_like,0.0,1211.45,1136384.0,595844,593450,2300.0
eyeriss_like_onchip_compression,0.0,804.72,1136384.0,595844,593450,2300.0


In [13]:
layer_dfs["eyeriss_like"]

Unnamed: 0_level_0,total_area,total_energy,total_cycle,num,energy,area,cycle,gflops,utilization,edp,shape
layer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.0,21.38,21504,1,21.38,0.0,21504,190.48,0.57,0.46,"{'C': 63, 'M': 256, 'N': 128}"
2,0.0,442.54,917504,7,63.22,0.0,131072,127.75,0.38,8.29,"{'C': 256, 'M': 256, 'N': 128}"
6,0.0,107.75,118784,1,107.75,0.0,118784,175.72,0.52,12.8,"{'C': 319, 'M': 256, 'N': 128}"
9,0.0,5.64,4096,1,5.64,0.0,4096,15.97,0.05,0.0231,"{'C': 256, 'M': 1, 'N': 128}"
11,0.0,631.07,72448,1,631.07,0.0,72448,127.77,0.38,45.7,"{'C': 283, 'M': 128, 'N': 128}"
12,0.0,3.07,2048,1,3.07,0.0,2048,47.81,0.14,0.00629,"{'C': 128, 'M': 3, 'N': 128}"


In [14]:
layer_dfs["simple_weight_stationary"]

KeyError: 'simple_weight_stationary'

In [15]:
layer_dfs["eyeriss_like_onchip_compression"]

Unnamed: 0_level_0,total_area,total_energy,total_cycle,num,energy,area,cycle,gflops,utilization,edp,shape
layer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.0,22.1,21504,1,22.1,0.0,21504,190.48,0.57,0.475,"{'C': 63, 'M': 256, 'N': 128}"
2,0.0,80.59,131072,1,80.59,0.0,131072,127.75,0.38,10.6,"{'C': 256, 'M': 256, 'N': 128}"
3,0.0,80.59,131072,1,80.59,0.0,131072,127.75,0.38,10.6,"{'C': 256, 'M': 256, 'N': 128}"
4,0.0,80.59,131072,1,80.59,0.0,131072,127.75,0.38,10.6,"{'C': 256, 'M': 256, 'N': 128}"
5,0.0,80.59,131072,1,80.59,0.0,131072,127.75,0.38,10.6,"{'C': 256, 'M': 256, 'N': 128}"
6,0.0,104.51,118784,1,104.51,0.0,118784,175.72,0.52,12.4,"{'C': 319, 'M': 256, 'N': 128}"
7,0.0,80.59,131072,1,80.59,0.0,131072,127.75,0.38,10.6,"{'C': 256, 'M': 256, 'N': 128}"
8,0.0,80.59,131072,1,80.59,0.0,131072,127.75,0.38,10.6,"{'C': 256, 'M': 256, 'N': 128}"
9,0.0,5.13,4096,1,5.13,0.0,4096,15.97,0.05,0.021,"{'C': 256, 'M': 1, 'N': 128}"
10,0.0,80.59,131072,1,80.59,0.0,131072,127.75,0.38,10.6,"{'C': 256, 'M': 256, 'N': 128}"
