In [None]:
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent.parent))

In [None]:
%load_ext autoreload
%autoreload 2

import gym_minigrid  # noqa: F401
import gymnasium as gym
from typing import List
from gymnasium.core import ActType
import specless as sl
from specless.minigrid.tspenv import TSPBenchmarkEnv

GYM_MONITOR_LOG_DIR = Path.cwd().joinpath(".gymlog")
print(str(GYM_MONITOR_LOG_DIR))

# gym.pprint_registry()

In [None]:
ENV_ID = "MiniGrid-TSPBenchmarkEnv-v0"
env = gym.make(
    ENV_ID,
    num_locations=5,
    width=20,
    height=20,
    agent_start_pos=(1, 5),
)
env = sl.MiniGridTransitionSystemWrapper(env)

In [None]:
##### Convert a Transition System from an OpenAI Gym Environment (env)
tsbuilder = sl.TSBuilder()
transition_system = tsbuilder(env, "minigrid")

##### Convert the Transition System to a Traveling Saleseman Problem
adapter = sl.TSPBuilder()
# TODO - 2: Create a Specification Class with just a list of nodes
tsp: sl.TSP = adapter(transition_system)

print(tsp)

##### Solve the TSP and obtain tours
tspsolver = sl.MILPTSPSolver()
tours, cost = tspsolver.solve(tsp)

print("Tours: ", tours)

##### Map the tours back onto the OpenAI Gym Environment to obtain a controller(s) (a sequence of actions)
# TODO: Inlucd the actions -> strategy conversions into the adapter
actions: List[ActType] = [adapter.map_back_to_controls(tour) for tour in tours]
if len(actions) == 0:
    assert False
##### Convert a sequences of actions to a strategy class.
if len(actions) == 1:
    strategy = sl.PlanStrategy(actions[0])
else:
    strategy = sl.CombinedStrategy([sl.PlanStrategy(action) for action in actions])

In [None]:
for obs, states in adapter.obs_to_states.items():
    print(obs)
    uniques = set()
    for state in states:
        p, d = state
        if p in uniques:
            continue
        uniques.add(p)
        print(f"\t{p}")

state, info = env.reset()
print("Initial State: ", state["pos"], state["dir"])

In [None]:
env = sl.TerminateIfNoStrategyWrapper(env)

In [None]:
# TODO: Simply change it to the following using MultiAgentWrapper()
# states, actions = sl.simulate(env, strategy)
# print(states, actions)

# TODO: Implement MultiAgentWrapper
# This should hold multiple envs.
if isinstance(strategy, sl.CombinedStrategy):
    for i, s in enumerate(strategy.strategies):
        print(f"Agent {i+1}: ")
        states, actions = sl.simulate(env, s)
        obs = [s["observation"] for s in states]
        pos = [s["pos"] for s in states]
        states = [(s["pos"], s["dir"], s["observation"]) for s in states]
        print("\t\n".join(map(str, states)))
else:
    states, actions = sl.simulate(env, strategy)
    print(states, actions)