# Profile NeRF with Timeloop and Accelergy

In [1]:
# Add parent dir to path so we can import accelerating_nerfs
import sys
sys.path.append("../")

In [2]:
import json
import numpy as np
import os
import pandas as pd
import re
import yaml
import traceback

from collections import defaultdict
from accelerating_nerfs.models import VanillaNeRF, patch_forward

# Custom code
from analysis import *
from profiler import Profiler
from notebook_utils import natural_sort

SIN COUNT 256


  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  warn(f"Failed to load image Python extension: {e}")


### Configure saving of profiling results
This isn't important so you can ignore the details.

In [3]:
# Accumulate results in this dictionary
profile_results = {}

# Setup saving the profiling results
results_dir = "dot-product_profile_results"
os.makedirs(results_dir, exist_ok=True)


def save_results():
    all_other_results = {}
    
    for arch, arch_results in profile_results.items():
        # Write the super long results to it's own file
        arch_results_path = os.path.join(results_dir, f"{arch}_results.json")
        with open(arch_results_path, "w") as f:
            json.dump(arch_results["results"], f, indent=4)
        
        # Accumulate the other results as they're shorter and more readable
        other_results = {
            k: v for k, v in arch_results.items()
            if k != "results"
        }
        # Have a pointer to the separate results file
        other_results["results"] = os.path.abspath(arch_results_path)
        all_other_results[arch] = other_results
    
    results_path = os.path.join(results_dir, "results.json")
    with open(results_path, "w") as f:
        json.dump(all_other_results, f, indent=4)

    print(f"Saved profile results to {results_path}")

## Profile using Timeloop and Accelergy
I think we can safely ignore the 'No handlers found'

To rerun things, delete the existing results in the `profiled_libs/` directory.

In [4]:
# Don't use simba_like or simple_output_stationary as the mapper constraints are too stringent
archs_and_sparse = [
#     ("simple_weight_stationary", False),
#     ("simple_output_stationary", False),
    ("eyeriss_like", False),
    ("eyeriss_like_onchip_compression", True),
    ("eyeriss_like_w_gating", True),
    # ("eyeriss_like_shen", True),
]
failed_archs = set()

for (arch, is_sparse) in archs_and_sparse:
    msg = f"Running {arch}"
    print(len(msg) * '=')
    print(msg)
    print(len(msg) * '=')
    
    # Profile - you shouldn't need to change anything here
    try:
        profiler = Profiler(
            top_dir='workloads',
            sub_dir="dot-product" if not is_sparse else 'dot-product-sparse',
            timeloop_dir=f"designs/{arch}",
            arch_name=arch,
            model=None,
            input_size=None,
            profiled_lib_dir_pattern="./dot-product_profiled_libs/{arch_name}_profiled_lib.json",
        )
        results, summary, layer_summary = profiler.profile()
    except Exception as e:
        traceback.print_exc()
        print(f"ERROR: could not run profiler for {arch}, do not trust these results!")
        failed_archs.add(arch)
        continue
    
    # Print summary information
    for k, v in summary.items():
        print(f"{k}: {v}")
        
    profile_results[arch] = {
        "results": results,
        "summary": summary,
        "layer_summary": layer_summary,
    }
    save_results()

Running eyeriss_like
Loaded profiled lib from ./dot-product_profiled_libs/eyeriss_like_profiled_lib.json


running timeloop to get energy and latency...: 0it [00:00, ?it/s]


Saved profiled lib to ./dot-product_profiled_libs/eyeriss_like_profiled_lib.json
total_area: 0.0
total_energy: 0.04
total_cycle: 16.0
Saved profile results to dot-product_profile_results/results.json
Running eyeriss_like_onchip_compression
Loaded profiled lib from ./dot-product_profiled_libs/eyeriss_like_onchip_compression_profiled_lib.json
Sparse optimization enabled for layer 1
Sparse optimization enabled for layer 2
Sparse optimization enabled for layer 3
Sparse optimization enabled for layer 4
Sparse optimization enabled for layer 5
Sparse optimization enabled for layer 6


running timeloop to get energy and latency...: 100%|██████████| 6/6 [00:25<00:00,  4.19s/it]


Saved profiled lib to ./dot-product_profiled_libs/eyeriss_like_onchip_compression_profiled_lib.json
total_area: 0.0
total_energy: 0.18
total_cycle: 60.0
Saved profile results to dot-product_profile_results/results.json
Running eyeriss_like_w_gating
Loaded profiled lib from ./dot-product_profiled_libs/eyeriss_like_w_gating_profiled_lib.json
Sparse optimization enabled for layer 1
Sparse optimization enabled for layer 2
Sparse optimization enabled for layer 3
Sparse optimization enabled for layer 4
Sparse optimization enabled for layer 5
Sparse optimization enabled for layer 6


running timeloop to get energy and latency...: 100%|██████████| 6/6 [00:19<00:00,  3.26s/it]

Saved profiled lib to ./dot-product_profiled_libs/eyeriss_like_w_gating_profiled_lib.json
total_area: 0.0
total_energy: 0.18
total_cycle: 61.0
Saved profile results to dot-product_profile_results/results.json





### <span style="color: red">Analyze detailed results in "Analyze Results.ipynb"</span>