# Williams-Style Space–Time Tradeoff: Bench Notebook
Benchmarks your local `enhanced_transformer.py` module using three policies:

- **cache** (standard KV cache)
- **grouped** KV cache with fewer KV heads (saves cache memory)
- **recompute** tiled attention (Williams tradeoff: recompute instead of storing, bounded by Qc×Kc)

It records **runtime**, **peak GPU memory** (if CUDA), and **ΔRSS** for CPU.

In [None]:
import sys, os
import torch
import pandas as pd
import matplotlib.pyplot as plt
from importlib import reload

sys.path.append(os.getcwd())
import bench_williams as bw
import enhanced_transformer as et
bw, et = reload(bw), reload(et)
print('Device:', 'cuda' if torch.cuda.is_available() else 'cpu')
print('TransformerBlock exists:', hasattr(et, 'TransformerBlock'))

In [None]:
seq_list = [1024, 4096, 8192, 16384]
configs = [
    {"name":"cache", "policy":"cache"},
    {"name":"grouped(kv=2)", "policy":"grouped", "kv_heads":2},
    {"name":"recompute(Q64,K256)", "policy":"recompute", "q_chunk":64, "k_chunk":256},
]
rows = bw.bench_grid(seq_list, configs, runs=3, d_model=256, heads=8, device=None, warmup=1)
df = pd.DataFrame(rows)
df

In [None]:
from caas_jupyter_tools import display_dataframe_to_user
display_dataframe_to_user('Williams Bench Results', df)

In [None]:
# Runtime vs sequence length
plt.figure()
for name in df['name'].unique():
    sub = df[df['name'] == name].sort_values('seq')
    plt.plot(sub['seq'], sub['time_mean_s'], marker='o', label=name)
plt.xlabel('Sequence length')
plt.ylabel('Mean runtime (s)')
plt.title('Runtime vs Sequence Length')
plt.legend()
plt.show()

In [None]:
# Peak GPU memory vs sequence length (if CUDA)
if torch.cuda.is_available():
    plt.figure()
    for name in df['name'].unique():
        sub = df[df['name'] == name].sort_values('seq')
        plt.plot(sub['seq'], sub['gpu_peak_MB'], marker='o', label=name)
    plt.xlabel('Sequence length')
    plt.ylabel('Peak GPU memory (MB)')
    plt.title('Peak GPU Memory vs Sequence Length')
    plt.legend()
    plt.show()
else:
    print('CUDA not available: skipping GPU memory plot')

In [None]:
# Process RSS delta (MB) vs sequence length
plt.figure()
for name in df['name'].unique():
    sub = df[df['name'] == name].sort_values('seq')
    plt.plot(sub['seq'], sub['rss_delta_MB'], marker='o', label=name)
plt.xlabel('Sequence length')
plt.ylabel('ΔRSS (MB)')
plt.title('Process RSS Delta vs Sequence Length')
plt.legend()
plt.show()