# Profile NeRF with Timeloop and Accelergy

In [1]:
import os
import re
import yaml
import json

from profiler import Profiler

import sys
sys.path.append("../") # go to parent dir

from accelerating_nerfs.models import VanillaNeRF

In [2]:
# Using vanilla NeRF which are MLPs
model = VanillaNeRF()

# Need to patch the forward method for the purpose of mapping to pass in ray directions
# This ensures the bottleneck layer is captured in the timeloop outputs
model.old_forward = model.forward

def new_forward(self, x):
    return self.old_forward(x, x)

model.forward = new_forward.__get__(model)
print(model)

VanillaNeRF(
  (posi_encoder): SinusoidalEncoder()
  (view_encoder): SinusoidalEncoder()
  (mlp): NerfMLP(
    (base): MLP(
      (hidden_activation): ReLU()
      (output_activation): Identity()
      (hidden_layers): ModuleList(
        (0): Linear(in_features=63, out_features=256, bias=True)
        (1): Linear(in_features=256, out_features=256, bias=True)
        (2): Linear(in_features=256, out_features=256, bias=True)
        (3): Linear(in_features=256, out_features=256, bias=True)
        (4): Linear(in_features=256, out_features=256, bias=True)
        (5): Linear(in_features=319, out_features=256, bias=True)
        (6): Linear(in_features=256, out_features=256, bias=True)
        (7): Linear(in_features=256, out_features=256, bias=True)
      )
    )
    (sigma_layer): DenseLayer(
      (hidden_activation): ReLU()
      (output_activation): Identity()
      (hidden_layers): ModuleList()
      (output_layer): Linear(in_features=256, out_features=1, bias=True)
    )
    (bottl

In [7]:
archs = ["eyeriss_like"] #, "simba_like", "simple_output_stationary", "simple_weight_stationary"]
arch_results = {}

for arch in archs:
    print(20 * '=')
    print(f"Running {arch}")
    print(20 * '=')
    
    # Profile
    profiler = Profiler(
        top_dir='workloads',
        sub_dir='nerf',
        timeloop_dir=arch,
        arch_name=arch,
        model=model,
        input_size=(1, 3),
        batch_size=128,  # TODO: adjust this, ICARUS uses 128
        convert_fc=True,
        exception_module_names=[]
    )
    results, summary = profiler.profile()
    
    # Print some results for debug purposes
    total_energy = 0
    total_cycle = 0
    layer_results = {}
    for layer_id, info in results.items():
        print(f"ID: {layer_id} \t Energy: {info['energy']} \t Cycle: {info['cycle']} \t Number of same architecture layers: {info['num']}")
        layer_energy = info["energy"] * info["num"]
        layer_cycle = info["cycle"] * info["num"]
        
        total_energy += layer_energy
        total_cycle += layer_cycle
        layer_results[layer_id] = {
            "name": info["name"],
            "total_energy": layer_energy,
            "total_cycle": layer_cycle,
            "num": info["num"],
            "energy": info["energy"],
            "cycle": info["cycle"]
        }
    
    # Our results should match up with summary
    assert total_energy == summary["total_energy"], (total_energy, summary["total_energy"])
    assert total_cycle == summary["total_cycle"]
    print("Summary:", json.dumps(summary, indent=4))

    arch_results[arch] = {
        "results": results,
        "summary": summary,
        "layer_results": layer_results
    }

unknown module type <class 'accelerating_nerfs.models.SinusoidalEncoder'>
unknown module type <class 'accelerating_nerfs.models.SinusoidalEncoder'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unknown module type <class 'accelerating_nerfs.models.MLP'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unknown module type <class 'accelerating_nerfs.models.DenseLayer'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unknown module type <class 'accelerating_nerfs.models.DenseLayer'>
unknown module type <class 'torch.nn.modules.linear.Identity'>
unknown module type <class 'accelerating_nerfs.models.MLP'>
unknown module type <class 'accelerating_nerfs.models.NerfMLP'>
unknown module type <class 'accelerating_nerfs.models.VanillaNeRF'>


Running eyeriss_like


running timeloop to get energy and latency...: 0it [00:00, ?it/s]

ID: 1 	 Energy: 21.38 	 Cycle: 21504 	 Number of same architecture layers: 1
ID: 2 	 Energy: 63.22 	 Cycle: 131072 	 Number of same architecture layers: 7
ID: 6 	 Energy: 107.75 	 Cycle: 118784 	 Number of same architecture layers: 1
ID: 9 	 Energy: 5.64 	 Cycle: 4096 	 Number of same architecture layers: 1
ID: 11 	 Energy: 631.07 	 Cycle: 72448 	 Number of same architecture layers: 1
ID: 12 	 Energy: 3.07 	 Cycle: 2048 	 Number of same architecture layers: 1
Summary: {
    "total_energy": 1211.45,
    "total_cycle": 1136384,
    "num_params": 595844,
    "macs": 593450,
    "activation_size": 2300.0
}





In [8]:
results

{1: {'layer_dict': {'problem': {'instance': {'C': 63,
     'Hdilation': 1,
     'Hstride': 1,
     'M': 256,
     'N': 128,
     'P': 1,
     'Q': 1,
     'R': 1,
     'S': 1,
     'Wdilation': 1,
     'Wstride': 1},
    'shape': {'coefficients': [{'default': 1, 'name': 'Wstride'},
      {'default': 1, 'name': 'Hstride'},
      {'default': 1, 'name': 'Wdilation'},
      {'default': 1, 'name': 'Hdilation'}],
     'data-spaces': [{'name': 'Weights',
       'projection': [[['C']], [['M']], [['R']], [['S']]]},
      {'name': 'Inputs',
       'projection': [[['N']],
        [['C']],
        [['R', 'Wdilation'], ['P', 'Wstride']],
        [['S', 'Hdilation'], ['Q', 'Hstride']]]},
      {'name': 'Outputs',
       'projection': [[['N']], [['M']], [['Q']], [['P']]],
       'read-write': True}],
     'dimensions': ['C', 'M', 'R', 'S', 'N', 'P', 'Q'],
     'name': 'linear'}}},
  'num': 1,
  'name': '/home/workspace/notebooks/workloads/nerf/nerf_layer1',
  'mapper_timeout': 15000,
  'mapper_algo':

In [5]:
arch_results['eyeriss_like']['layer_results']

{1: {'name': '/home/workspace/notebooks/workloads/nerf/nerf_layer1',
  'total_energy': 21.38,
  'total_cycle': 21504,
  'num': 1,
  'energy': 21.38,
  'cycle': 21504},
 2: {'name': '/home/workspace/notebooks/workloads/nerf/nerf_layer2',
  'total_energy': 442.53999999999996,
  'total_cycle': 917504,
  'num': 7,
  'energy': 63.22,
  'cycle': 131072},
 6: {'name': '/home/workspace/notebooks/workloads/nerf/nerf_layer6',
  'total_energy': 107.75,
  'total_cycle': 118784,
  'num': 1,
  'energy': 107.75,
  'cycle': 118784},
 9: {'name': '/home/workspace/notebooks/workloads/nerf/nerf_layer9',
  'total_energy': 5.64,
  'total_cycle': 4096,
  'num': 1,
  'energy': 5.64,
  'cycle': 4096},
 11: {'name': '/home/workspace/notebooks/workloads/nerf/nerf_layer11',
  'total_energy': 631.07,
  'total_cycle': 72448,
  'num': 1,
  'energy': 631.07,
  'cycle': 72448},
 12: {'name': '/home/workspace/notebooks/workloads/nerf/nerf_layer12',
  'total_energy': 3.07,
  'total_cycle': 2048,
  'num': 1,
  'energy':

## pytorch2timeloop layer mapping info
Match layer ID in results against mapped NeRF layers

In [6]:
def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key=alphanum_key)

nerf_layer_dir = "workloads/nerf"
for layer_path in natural_sort(os.listdir(nerf_layer_dir)):
    layer_path = os.path.join(nerf_layer_dir, layer_path)
    
    with open(layer_path, "r") as f:
        layer_config = yaml.safe_load(f)
        
    C = layer_config['problem']['instance']['C']
    M = layer_config['problem']['instance']['M']
    N = layer_config['problem']['instance']['N']
    print(f"{os.path.basename(layer_path)}, C={C}, M={M}, N={N}")

nerf_layer1.yaml, C=63, M=256, N=128
nerf_layer2.yaml, C=256, M=256, N=128
nerf_layer3.yaml, C=256, M=256, N=128
nerf_layer4.yaml, C=256, M=256, N=128
nerf_layer5.yaml, C=256, M=256, N=128
nerf_layer6.yaml, C=319, M=256, N=128
nerf_layer7.yaml, C=256, M=256, N=128
nerf_layer8.yaml, C=256, M=256, N=128
nerf_layer9.yaml, C=256, M=1, N=128
nerf_layer10.yaml, C=256, M=256, N=128
nerf_layer11.yaml, C=283, M=128, N=128
nerf_layer12.yaml, C=128, M=3, N=128
