In [4]:
from SPARS.Gym.utils import learn
from SPARS.Utils import get_logger, log_output
from SPARS.Simulator.Simulator import Simulator, run_simulation
from SPARS.Gym.gym import HPCGymEnv
from datetime import datetime
from RL_Agent.SPARS.agent import ActorCriticMLP
import torch as T

logger = get_logger("runner", level="INFO", log_file="results/simulation.log")

In [5]:
# === Configuration ===
workload_path = "workloads/generated.json"  # Path to platform JSON file
platform_path = "platforms/generated.json"  # Path to output folder
output_path = "results/generated"  # Scheduling algorithm name
algorithm = "fcfs_normal"  # Simulation timeout in seconds (optional)
# Time in format YYYY-MM-DD HH:MM:SS (optional). If not provided, starts at 0.
timeout = None
start_time = None
RL = False  # Reinforcement Learning mode

In [6]:
# === Initialize simulator ===
if start_time is not None:
    start_time = int(datetime.strptime(
        start_time, "%Y-%m-%d %H:%M:%S").timestamp())
    human_readable = True
else:
    start_time = 0
    human_readable = False


# === Main ===
if RL:
    # === RL parameters ===
    learning_rate = 0.0003
    device = "cuda"
    epoch = 10
    num_nodes = 16
    obs_dim = 11
    act_dim = 1

    model = ActorCriticMLP(obs_dim, act_dim, num_nodes, device)
    model_opt = T.optim.Adam(model.parameters(), lr=learning_rate)

    simulator = Simulator(workload_path, platform_path,
                          start_time, algorithm, rl=True, timeout=timeout)

    env = HPCGymEnv(simulator, device)

    for _ in range(epoch):
        env.reset(workload_path, platform_path, start_time, algorithm)
        env.simulator.start_simulator()
        observation = env.get_observation()

        memory_features = []
        memory_masks = []
        memory_actions = []
        memory_rewards = []
        while env.simulator.is_running:
            features_, mask_ = observation
            featurs_ = features_.to(device)
            logits, values = model(features_)

            next_observation, reward, done = env.step(logits)

            memory_actions.append(logits.detach())
            memory_features.append(features_.detach())
            memory_masks.append(mask_.detach())
            memory_rewards.append(reward.detach())

            saved_experiences = (memory_actions, memory_features, memory_masks,
                                 memory_rewards)

            learn(model, model_opt, done,
                  saved_experiences, next_observation)

            observation = next_observation

    log_output(env.simulator, output_path)
else:
    simulator = Simulator(workload_path, platform_path,
                          start_time, algorithm, timeout=timeout)
    run_simulation(simulator, output_path)

2025-09-09 19:52:30 | INFO | runner:94 - [Time=0.00] type=simulation_start
2025-09-09 19:52:30 | INFO | runner:94 - [Time=9.00] job_id=1 type=arrival res=5 subtime=9 reqtime=47 runtime=50 profile=100 user_id=0
2025-09-09 19:52:30 | INFO | runner:94 - [Time=9.00] job_id=1 type=execution_start subtime=9 runtime=50 reqtime=47 res=5 nodes=[0, 1, 2, 3, 4]
2025-09-09 19:52:30 | INFO | runner:94 - [Time=14.00] job_id=2 type=arrival res=3 subtime=14 reqtime=34 runtime=34 profile=100 user_id=0
2025-09-09 19:52:30 | INFO | runner:94 - [Time=14.00] job_id=2 type=execution_start subtime=14 runtime=34 reqtime=34 res=3 nodes=[5, 6, 7]
2025-09-09 19:52:30 | INFO | runner:94 - [Time=19.00] job_id=3 type=arrival res=5 subtime=19 reqtime=134 runtime=135 profile=100 user_id=0
2025-09-09 19:52:30 | INFO | runner:94 - [Time=19.00] job_id=3 type=execution_start subtime=19 runtime=135 reqtime=134 res=5 nodes=[8, 9, 10, 11, 12]
2025-09-09 19:52:30 | INFO | runner:94 - [Time=24.00] job_id=4 type=arrival res=3 