# LLM Inference System Simulation
This notebook reproduces the logic and results of main.py, including simulation setup, single run, Gantt chart, parameter sweep, and PP-T sweep visualizations.

<div align="center">
<span style="color:red">&#9888; This Jupyter notebook has been tested in VS Code (with the Jupyter extension installed). &#9888;</span>
</div>

In [9]:
!pip install nbformat
!pip install dash



In [10]:
# Import required modules and visualizers
from simulator import CommNetworkSimulator, GPU, LLM
from GanttVisualizer import GanttVisualizer
from MTSweepVisualizer import MTSweepVisualizer
from PPTSweepVisualizer import PPTSweepVisualizer
from DisaggregatedPDSystemPP import DisaggregatedPDSystemPP

In [11]:
# Common Configuration
system_config = {
    "llm": LLM.from_name("LLaMA-3.1-70B"),
    "prefill_gpu": GPU.from_name("NVIDIA H100"),
    "decode_gpu": GPU.from_name("NVIDIA RTX PRO 6000"),
    "pp_degree": 8,
    "num_prefill_ib_cards": 1,
    "N": 1,
    "vram_limit_ratio": 0.95
}

In [12]:
# 1. Single Run
run_config = system_config.copy()
run_config.update({"T": 8192, "M": 128})
pd_system = DisaggregatedPDSystemPP(**run_config)
sim = CommNetworkSimulator()
pd_system.start(sim)
sim.run(pd_system)
ttds = pd_system.calculate_ttds(sim)
tpot = pd_system.calculate_tpot(sim)
ttft = ttds + tpot
if ttds is not None and tpot is not None:
    print(f"TTDS (Time To Decode Start): {ttds:.6f} seconds")
    print(f"TPOT (Time Per Output Token): {tpot:.6f} seconds")
    print(f"TTFT (Time To First Token): {ttft:.6f} seconds")
else:
    print("No decode jobs found for TTDS/TPOT calculation.")
print(f"Total Inference Latency: {sim.current_time:.4f} seconds")
print(f"Prefill VRAM Utilization: {pd_system.prefill_vram_util:.2f}%")
print(f"Decode VRAM Utilization: {pd_system.decode_vram_util:.2f}%")

TTDS (Time To Decode Start): 0.469320 seconds
TPOT (Time Per Output Token): 0.106630 seconds
TTFT (Time To First Token): 0.575950 seconds
Total Inference Latency: 0.5760 seconds
Prefill VRAM Utilization: 20.92%
Decode VRAM Utilization: 17.44%


In [13]:
# Visualization
visualizer = GanttVisualizer(pd_system, 1.0)
visualizer.generate(sim)

In [14]:
# --- Parameter Sweep (3D Plot) ---
print("\n--- Running Parameter Sweep (3D Plot) ---")
t_sweep_range = range(1024, 8192 + 1, 1024)
sweeper = MTSweepVisualizer(
    system_cls=DisaggregatedPDSystemPP,
    base_config=system_config,
    t_range=t_sweep_range,
    m_start=64,
    m_step=128,
    m_end=2048
)
results = sweeper.run_sweep()
sweeper.plot_3d(results, output_file="M_T_TTDS_sweep_3d.html")


--- Running Parameter Sweep (3D Plot) ---
Starting parameter sweep along T=range(1024, 8193, 1024) with M start=64 step=128...
Grid has missing values, skipping surface plot.


In [15]:
# --- PP-T Sweep (3D Plot) ---
print("\n--- Running PP-T Sweep (3D Plot) ---")
pp_sweep_range = range(1, 9)
t_sweep_range = range(1024, 8192 + 1, 1024)
ppt_sweeper = PPTSweepVisualizer(
    system_cls=DisaggregatedPDSystemPP,
    base_config=system_config,
    pp_range=pp_sweep_range,
    t_range=t_sweep_range,
    m_value=64
)
ppt_results = ppt_sweeper.run_sweep()
ppt_sweeper.plot_3d(ppt_results, output_file="PP_T_TTDS_sweep_3d.html")


--- Running PP-T Sweep (3D Plot) ---
Starting parameter sweep along PP=range(1, 9) and T=range(1024, 8193, 1024) with fixed M=64...


## Interactive 6D Gantt Visualizer (PP, M, T, N)
This cell launches the interactive 6D Gantt visualizer. It detects the environment and uses JupyterDash if running in a notebook, or Dash otherwise.

In [None]:
# Environment-aware launch of the 6D Gantt Visualizer
from utils_env import detect_environment
from Gantt6DVisualizer import Gantt6DVisualizer
from DisaggregatedPDSystemPP import DisaggregatedPDSystemPP
from simulator import GPU, LLM

system_config = {
    "llm": LLM.from_name("LLaMA-3.1-70B"),
    "prefill_gpu": GPU.from_name("NVIDIA H100"),
    "decode_gpu": GPU.from_name("NVIDIA RTX PRO 6000"),
    "pp_degree": 8,
    "num_prefill_ib_cards": 1,
    "N": 1,
    "vram_limit_ratio": 0.95
}

env = detect_environment()
if env in ["jupyter", "colab"]:
    try:
        from dash import Dash  # Modern Dash supports Jupyter inline mode
    except ImportError:
        print("Dash is not installed. Please install it with 'pip install dash'.")
    try:
        import Gantt6DVisualizer as gantt6d_mod
        visualizer_6d = gantt6d_mod.Gantt6DVisualizer(system_config)
        visualizer_6d.app = Dash(__name__)
        visualizer_6d._setup_layout()
        visualizer_6d._setup_callbacks()
        visualizer_6d.app.run(jupyter_mode='inline', debug=False, port=8050)
    except ImportError:
        print("Gantt6DVisualizer module is not installed or import failed.")
else:
    print("Point your browser to http://127.0.0.1:8050")
    visualizer_6d = Gantt6DVisualizer(system_config)
    visualizer_6d.run(debug=False)
