In [1]:
from SPARS.Gym.utils import learn
from SPARS.Utils import get_logger, log_output
from SPARS.Simulator.Simulator import Simulator, run_simulation
from SPARS.Gym.gym import HPCGymEnv
from datetime import datetime
from RL_Agent.SPARS.agent import ActorCriticMLP
import torch as T

logger = get_logger("runner", level="INFO", log_file="results/simulation.log")

In [2]:
# === Configuration ===
workload_path = "workloads/generated.json"  # Path to platform JSON file
platform_path = "platforms/generated.json"  # Path to output folder
output_path = "results/generated"  # Scheduling algorithm name
algorithm = "fcfs_normal"  # Simulation timeout in seconds (optional)
# Time in format YYYY-MM-DD HH:MM:SS (optional). If not provided, starts at 0.
timeout = None
start_time = None
RL = True  # Reinforcement Learning mode

In [None]:
# === Initialize simulator ===
if start_time is not None:
    start_time = int(datetime.strptime(
        start_time, "%Y-%m-%d %H:%M:%S").timestamp())
    human_readable = True
else:
    start_time = 0
    human_readable = False

# === Main ===
if RL:
    # === RL parameters ===
    learning_rate = 0.0003
    device = "cuda"
    epoch = 10
    num_nodes = 16
    obs_dim = 11
    act_dim = 1

    model = ActorCriticMLP(obs_dim, act_dim, num_nodes, device)
    model_opt = T.optim.Adam(model.parameters(), lr=learning_rate)

    simulator = Simulator(workload_path, platform_path,
                          start_time, algorithm, rl=True)

    env = HPCGymEnv(simulator, device)

    for _ in range(epoch):
        env.reset(workload_path, platform_path, start_time, algorithm)
        env.simulator.start_simulator()
        observation = env.get_observation()

        memory_features = []
        memory_masks = []
        memory_actions = []
        memory_rewards = []
        while env.simulator.is_running:
            features_, mask_ = observation
            featurs_ = features_.to(device)
            logits, values = model(features_)

            # Steps 1-3: model selects action, environment executes it
            next_observation, reward, done = env.step(logits)

            memory_actions.append(logits.detach())
            memory_features.append(features_.detach())
            memory_masks.append(mask_.detach())
            memory_rewards.append(reward.detach())

            saved_experiences = (memory_actions, memory_features, memory_masks,
                                 memory_rewards)

            learn(model, model_opt, done,
                  saved_experiences, next_observation)

            observation = next_observation

    log_output(env.simulator, output_path)
else:
    simulator = Simulator(workload_path, platform_path, start_time, algorithm)
    run_simulation(simulator, output_path)

2025-09-08 17:33:46 | INFO | runner:114 - Action taken: tensor([[0.6736]], device='cuda:0', grad_fn=<AddmmBackward0>)
2025-09-08 17:33:46 | INFO | runner:60 - Translated Actions: tensor([[10.5968]], device='cuda:0', grad_fn=<MulBackward0>) active nodes
2025-09-08 17:33:46 | INFO | runner:94 - [Time=0.00] type=simulation_start
2025-09-08 17:33:46 | INFO | runner:94 - [Time=0.00] type=switch_off nodes=[0, 1, 2, 3, 4]
2025-09-08 17:33:46 | INFO | runner:94 - [Time=5.00] type=turn_off nodes=[0, 1, 2, 3, 4]
2025-09-08 17:33:46 | INFO | runner:94 - [Time=5.00] type=CALL_RL
2025-09-08 17:33:46 | INFO | runner:90 - total_waste=11775.0000, total_wait=0.0000, reward=-1177.5000
2025-09-08 17:33:46 | INFO | runner:114 - Action taken: tensor([[-51.6885]], device='cuda:0', grad_fn=<AddmmBackward0>)
2025-09-08 17:33:46 | INFO | runner:60 - Translated Actions: tensor([[5.7028e-22]], device='cuda:0', grad_fn=<MulBackward0>) active nodes
2025-09-08 17:33:46 | INFO | runner:94 - [Time=5.00] type=switch_o

: 