In [1]:
import os, sys
os.environ['ON_PY']="1"
from data_utils import CaseGenerator
import random
from gym_jobshop.envs.fjsp_env import FJSPEnv
from stable_baselines3 import PPO
from params import configs
from model.DAN_policy import DualAttentionNetwork_

In [2]:

def test(model_name, test_job_length, test_op_pt, config=configs, env_kwargs=None):
    """
    Test a trained PPO model on the FJSP environment.
    
    :param model_name: The name of the trained model to load.
    :param test_job_length: The job length for the test environment.
    :param test_op_pt: The operation processing times for the test environment.
    :param config: Configuration object required for model initialization.
    :param env_kwargs: Additional keyword arguments for the FJSP environment initialization.
    """
    # 设置环境参数
    if env_kwargs is None:
        env_kwargs = {"num_jobs": 3, "num_machines": 2}
    
    # 创建测试环境
    test_env = FJSPEnv(env_kwargs["num_jobs"], env_kwargs["num_machines"])
    obs = test_env.set_initial_data(test_job_length, test_op_pt)

    # 加载模型并初始化
    model = PPO.load(f"../{model_name}")
    print(f"Loaded model: {model_name}")

    # # 如果模型需要重新初始化
    model.policy.features_extractor = DualAttentionNetwork_(
        observation_space=test_env.observation_space,
        config=config
    ).to(config.device)

    # model.policy.actor = Actor(
    #     config.num_mlp_layers_actor,
    #     4 * config.layer_fea_output_dim[-1] + test_env.observation_space['pair_features'].shape[-1],
    #     config.hidden_dim_actor,
    #     1
    # ).to(config.device)

    # model.policy.critic = Critic(
    #     config.num_mlp_layers_critic,
    #     2 * config.layer_fea_output_dim[-1],
    #     config.hidden_dim_critic,
    #     1
    # ).to(config.device)


    # 初始化测试变量
    done = False
    total_reward = 0
    step_count = 0

    # 记录测试日志
    print("\n--- Test Start ---")


    while not done:
        # 使用模型预测动作
        action, _states = model.predict(obs, deterministic=True)

        # 环境执行动作
        obs, reward, terminated, truncated, info = test_env.step(action)
        done = terminated or truncated  # 判断是否结束

        # 更新统计信息
        total_reward += reward
        step_count += 1

        # 日志记录当前步的信息
        if done: print(info)
        else: print(f"Step: {step_count}, Action: {action}, Reward: {reward}, Makespan: {info.get('makespan', 0)}")


    # 输出最终结果
    print("\n--- Test Completed ---")
    print(f"Total steps: {step_count}")
    print(f"Total reward: {total_reward}")

    # 渲染环境（可选）
    test_env.render()


In [3]:
n_j=14
n_m=11
op_per_job_min = 10
op_per_job_max = 20
case = CaseGenerator(n_j, n_m, op_per_job_min, op_per_job_max, 
                        flag_same_opes=False)
JobLength, OpPT, _ = case.get_case(0)
print(JobLength)
print(OpPT)
test("ppo_fjsp", JobLength, OpPT, config=configs, env_kwargs={"num_jobs": n_j, "num_machines": n_m})

203
[14 16 12 20 13 12 16 15 17 11 10 13 17 17]
[[ 0 14 17 ...  0 17 13]
 [ 0  4  4 ...  4  4  0]
 [12  8  8 ... 10  0  0]
 ...
 [ 7  0  5 ...  0  5  7]
 [ 0  0  0 ...  0 17  0]
 [ 0  0  0 ...  0  0  0]]
Loaded model: ppo_fjsp

--- Test Start ---


ValueError: Error: Unexpected observation shape (308, 10) for Box environment, please use (30, 10) or (n_env, 30, 10) for the observation shape.