# Simulation Model for MARL

#### What are the machines and their tasks?

As a whole, the 3 machines need to complete as many(all?) of the product_requests in the total amount of steps. To complete a product_request, certain tasks need to be fulfilled like Ironing, Hardening, Sublimation, etc. The machines have task_modes whioch define what kind of tasks the machines can do. They are called task_modes because they are the current mode that the machine is in, and a machine can only run one task_mode at a time.

#### What do we want the machines/agents to predict?

When a machine is finished running a task_mode we want that machine to choose whether to stay idle or choose one of its possible task_modes to run for a product.

(action) = (job_id, task_mode_id)

OR

(action) = IDLE

#### What do we want to optimize?

We want to minimize the amount of time it takes to complete the products, maximize the amount of products made, and minimize the total amount of energy used to make these products.

### What kind of RL?

We will be doing CTDE for MARL with 3 agents that correspond to the 3 machines.

### Load Relevant Data

In [4]:
import pandas as pd
import json

import itertools
from collections import defaultdict

with open("data/Input_JSON_Schedule_Optimization.json") as f:
    data = json.load(f)


# we need a dictionary that maps task_mode_id to its power consumption list

task_mode_power_dict = {
    mode["id"]: mode["power"]
    for mode in data["task_modes"]
}
print(f"Task Mode to Power List Dictionary: {task_mode_power_dict}")

task_to_task_modes_dict = {
    task["id"]: task["task_modes"]
    for task in data["tasks"]
}
print(f"Task to Task Modes Dictionary: {task_to_task_modes_dict}")

mode_to_machines = defaultdict(set)
for m in data["machines"]:
    mid = m["id"]
    for mode_id in m["task_modes"]:
        mode_to_machines[mode_id].add(mid)
print(f"Mode to Machines Dictionary: {mode_to_machines}")

task_to_machines = {
    task: set(itertools.chain.from_iterable(mode_to_machines[mode] for mode in modes))
    for task, modes in task_to_task_modes_dict.items()
}
print(f"Task to Machines Dictionary: {task_to_machines}")

Task Mode to Power List Dictionary: {'Ironing TM1': [66, 258, 337, 285, 228, 227, 266, 354, 1194], 'Ironing TM3': [672, 1178, 884, 625, 621, 577, 560, 557, 311], 'Ironing TM4': [1109, 611, 1025, 643, 910, 695, 1654, 220, 1204, 210, 1129, 221], 'Harden[0.5] TM1': [1782, 1224, 1355, 1266, 1254, 1256, 1223, 1207, 1225, 1183, 1317, 187], 'Harden[0.5] TM4': [654, 785, 86, 53, 0], 'Harden[1] TM1': [1184, 861, 9, 223, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 50, 38], 'Harden[1] TM2': [1167, 1167, 1167, 1196, 1155], 'Harden[1] TM5': [1687, 1681, 1674, 1665, 800, 1665, 757, 1539, 617], 'Harden[1.5] TM1': [1269, 1224, 1237, 1242, 1237, 1199, 1206, 1202, 1210, 1208, 1199, 1199, 1175, 1292, 483, 0, 0, 0, 0, 0, 0, 0, 0, 481, 1122], 'Harden[1.5] TM3': [510, 1624, 520, 1631, 506, 1628, 503, 3272, 922, 1213, 924, 1206, 948, 1201, 946], 'Harden[2] TM1': [724, 1118, 651, 1178, 578, 1265, 519, 1305, 502, 1331, 478, 1339, 456, 1356, 432, 1397, 408, 1420, 372, 1471, 155, 84, 66], 'Sublimation TM3': [78, 1029, 424, 3

In [None]:
import itertools
from collections import defaultdict

# ---------- 1) Index helpers from dataset ----------

# task -> [mode_ids]
task_to_modes = {t["id"]: list(t["task_modes"]) for t in data["tasks"]}

# mode_id -> power sequence
mode_power = {m["id"]: list(m["power"]) for m in data["task_modes"]}

# mode_id -> set(machines that can run it)
mode_to_machines = defaultdict(set)
for m in data["machines"]:
    mid = m["id"]
    for mode_id in m["task_modes"]:
        mode_to_machines[mode_id].add(mid)

# task -> set(machines that can run the task via any of its modes)
task_to_machines = {
    task: set(itertools.chain.from_iterable(mode_to_machines[mode] for mode in modes))
    for task, modes in task_to_modes.items()
}

# Constraints, kept for the environment to enforce later
order_constraints = []   # list of (first_task, second_task)
collision_constraints = set()  # set of frozenset({task1, task2})

for c in data.get("constraints", []):
    if c["type"] == "Order":
        order_constraints.append((c["parameter"]["first_task"], c["parameter"]["second_task"]))
    elif c["type"] == "Collision":
        t1 = c["parameter"]["task1"]; t2 = c["parameter"]["task2"]
        collision_constraints.add(frozenset({t1, t2}))


# ---------- 2) Build product recipes ----------
# product_id -> ordered list of (task_id, runs)
product_recipes = {}
for p in data["products"]:
    product_recipes[p["id"]] = [(x["task"], int(x["runs"])) for x in p["tasks"]]


# ---------- 3) Expand into concrete jobs (one per unit requested) ----------
jobs = []
job_counter = 1

for req in data["product_requests"]:
    product_id = req["product"]
    amount = int(req["amount"])
    deadline = req.get("deadline")  # may be absent

    # The route is copied per job so we can track progress independently
    recipe = product_recipes[product_id]

    for _ in range(amount):
        job_id = f"JOB{job_counter:04d}"
        job_counter += 1

        # Per-job route with progress fields
        route = []
        for task_id, runs in recipe:
            route.append({
                "task_id": task_id,
                "runs_required": runs,
                "runs_done": 0,
                "modes": list(task_to_modes[task_id]),               # allowed mode IDs for this task
                "eligible_machines": list(task_to_machines[task_id]) # machines that can run any mode of this task
            })

        jobs.append({
            "job_id": job_id,
            "product_id": product_id,
            "deadline": deadline,        # None if not provided
            "current_stage": 0,          # index in route; advance when runs_done == runs_required
            "route": route,              # ordered stages to complete
            "is_done": False,
        })

# ---------- 4) (Optional) Tiny peek at what you built ----------
def summarize_job(j):
    r = j["route"][j["current_stage"]]
    return {
        "job_id": j["job_id"],
        "product": j["product_id"],
        "deadline": j["deadline"],
        "stage_idx": j["current_stage"],
        "stage_task": r["task_id"],
        "stage_runs_done/req": f'{r["runs_done"]}/{r["runs_required"]}',
        "modes_available": r["modes"],
        "eligible_machines": r["eligible_machines"],
    }

for j in jobs:
    print(summarize_job(j))


{'job_id': 'JOB0001', 'product': 'WOVEN LABEL OURELA FABRIC', 'deadline': None, 'stage_idx': 0, 'stage_task': 'Ironing', 'stage_runs_done/req': '0/1', 'modes_available': ['Ironing TM1', 'Ironing TM3', 'Ironing TM4'], 'eligible_machines': ['MAQ120', 'MAQ118', 'MAQ119']}
{'job_id': 'JOB0002', 'product': 'WOVEN LABEL OURELA FABRIC', 'deadline': None, 'stage_idx': 0, 'stage_task': 'Ironing', 'stage_runs_done/req': '0/1', 'modes_available': ['Ironing TM1', 'Ironing TM3', 'Ironing TM4'], 'eligible_machines': ['MAQ120', 'MAQ118', 'MAQ119']}
{'job_id': 'JOB0003', 'product': 'WOVEN LABEL OURELA FABRIC', 'deadline': None, 'stage_idx': 0, 'stage_task': 'Ironing', 'stage_runs_done/req': '0/1', 'modes_available': ['Ironing TM1', 'Ironing TM3', 'Ironing TM4'], 'eligible_machines': ['MAQ120', 'MAQ118', 'MAQ119']}
{'job_id': 'JOB0004', 'product': 'WOVEN LABEL OURELA FABRIC', 'deadline': None, 'stage_idx': 0, 'stage_task': 'Ironing', 'stage_runs_done/req': '0/1', 'modes_available': ['Ironing TM1', 'Iro

#### Simulation Rules

In [5]:
TOTAL_STEPS = 1152  # total steps in the simulation (6 days * 16 hours * (60/5) minutes)

class MachineRuntime:
    def __init__(self):
        self.busy = False # is the machine currently running a task_mode
        self.job_id = None  # which job the machine is working on
        self.task_mode_id = None # which task_mode the machine is running
        self.remaining_task_mode_steps = 0 # remaining steps for the current task_mode
        self.task_mode_step = 0  # The "step"/index of the current task_mode

    def start_task_mode(self, job_id, task_mode_id):
        """
        Start a new task_mode on this machine.
        """
        self.busy = True
        self.job_id = job_id
        self.task_mode_id = task_mode_id

        # Get the power list for each step this task_mode
        power_list = task_mode_power_dict[task_mode_id] 
        self.remaining_task_mode_steps = len(power_list)

        self.task_mode_step = 0 # indexes through the power list

        
    def step_power(self, power_seq):
        """
        Return power consumption for this step.
        If we're in a task_mode, 
        """
        if not self.busy:
            return 0 
        
        power_list = task_mode_power_dict[self.task_mode_id]
        power = power_list[self.task_mode_step]  # get current step power
        
        self.task_mode_step += 1
        self.remaining_task_mode_steps -= 1
        if self.remaining_task_mode_steps == 0:
            # task is complete
            self._reset()
        return power
    
    def _reset(self):

        self.busy = False
        self.job_id = None
        self.task_mode_id = None
        self.remaining_task_mode_steps = 0
        self.task_mode_step = 0

In [6]:
# Example usage

m = MachineRuntime()
m.start_task_mode("JOB1", "Ironing TM1")
while m.busy:
    power = m.step_power(None)
    print(f"Step power: {power}")

Step power: 66
Step power: 258
Step power: 337
Step power: 285
Step power: 228
Step power: 227
Step power: 266
Step power: 354
Step power: 1194
