In [1]:
from agilerl.utils.utils import create_population
from agilerl.vector.pz_async_vec_env import AsyncPettingZooVecEnv
import torch
import supersuit as ss

from mcrafter import Env
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the network configuration
NET_CONFIG = {
        "arch": "cnn",  # Network architecture
        "hidden_size": [128, 64],  # Slightly larger final dense layer
        "channel_size": [32, 64, 64],  # Uniform channel size
        "kernel_size": [8, 4, 3],      # Same kernel sizes
        "stride_size": [4, 2, 1],      # Same strides
        "normalize": True,  # Normalize image from range [0,255] to [0,1]
    }

# Define the initial hyperparameters
INIT_HP = {
    # Swap image channels dimension from last to first [H, W, C] -> [C, H, W]
    "CHANNELS_LAST": True,
    "BATCH_SIZE": 64,  # Batch size
    "O_U_NOISE": True,  # Ornstein Uhlenbeck action noise
    "EXPL_NOISE": 0.15,  # Action noise scale
    "MEAN_NOISE": 0.0,  # Mean action noise
    "THETA": 0.15,  # Rate of mean reversion in OU noise
    "DT": 0.01,  # Timestep for OU noise
    "LR_ACTOR": 0.001,  # Actor learning rate
    "LR_CRITIC": 0.001,  # Critic learning rate
    "GAMMA": 0.995,  # Discount factor
    "MEMORY_SIZE": 200000,  # Max memory buffer size
    "LEARN_STEP": 32,  # Learning frequency
    "TAU": 0.01,  # For soft update of target parameters
    "POLICY_FREQ": 2,  # Policy frequnecy
    "POP_SIZE": 2,  # Population size
}

num_envs = 8
# Define the simple speaker listener environment as a parallel environment
env = Env(n_players=2, render_mode='human')
env = ss.color_reduction_v0(env, 'full')
env = ss.black_death_v3(env)
env = ss.frame_stack_v1(env, 4)
env = AsyncPettingZooVecEnv([lambda: env for _ in range(num_envs)])
env.reset()

# Configure the multi-agent algo input arguments
try:
    state_dim = [env.single_observation_space(agent).n for agent in env.agents]
    one_hot = True
except Exception:
    state_dim = [env.single_observation_space(agent).shape for agent in env.agents]
    one_hot = False
try:
    action_dim = [env.single_action_space(agent).n for agent in env.agents]
    INIT_HP["DISCRETE_ACTIONS"] = True
    INIT_HP["MAX_ACTION"] = None
    INIT_HP["MIN_ACTION"] = None
except Exception:
    action_dim = [env.single_action_space(agent).shape[0] for agent in env.agents]
    INIT_HP["DISCRETE_ACTIONS"] = False
    INIT_HP["MAX_ACTION"] = [env.single_action_space(agent).high for agent in env.agents]
    INIT_HP["MIN_ACTION"] = [env.single_action_space(agent).low for agent in env.agents]

# Not applicable to MPE environments, used when images are used for observations (Atari environments)
if INIT_HP["CHANNELS_LAST"]:
    state_dim = [
        (state_dim[2], state_dim[0], state_dim[1]) for state_dim in state_dim
    ]

# Append number of agents and agent IDs to the initial hyperparameter dictionary
INIT_HP["N_AGENTS"] = env.num_agents
INIT_HP["AGENT_IDS"] = env.agents

# Create a population ready for evolutionary hyper-parameter optimisation
pop = create_population(
    "MADDPG",
    state_dim,
    action_dim,
    one_hot,
    NET_CONFIG,
    INIT_HP,
    population_size=INIT_HP["POP_SIZE"],
    num_envs=num_envs,
    device=device,
)

In [2]:
from agilerl.components.multi_agent_replay_buffer import MultiAgentReplayBuffer

field_names = ["state", "action", "reward", "next_state", "done"]
memory = MultiAgentReplayBuffer(
    INIT_HP["MEMORY_SIZE"],
    field_names=field_names,
    agent_ids=INIT_HP["AGENT_IDS"],
    device=device,
)

In [3]:
from agilerl.hpo.tournament import TournamentSelection

tournament = TournamentSelection(
    tournament_size=2,  # Tournament selection size
    elitism=True,  # Elitism in tournament selection
    population_size=INIT_HP["POP_SIZE"],  # Population size
    eval_loop=1,  # Evaluate using last N fitness scores
)

In [4]:
from agilerl.hpo.mutation import Mutations

mutations = Mutations(
    algo="MADDPG",
    no_mutation=0.2,  # Probability of no mutation
    architecture=0.2,  # Probability of architecture mutation
    new_layer_prob=0.2,  # Probability of new layer mutation
    parameters=0.2,  # Probability of parameter mutation
    activation=0,  # Probability of activation function mutation
    rl_hp=0.2,  # Probability of RL hyperparameter mutation
    rl_hp_selection=[
        "lr",
        "learn_step",
        "batch_size",
    ],  # RL hyperparams selected for mutation
    mutation_sd=0.1,  # Mutation strength
    agent_ids=INIT_HP["AGENT_IDS"],
    arch=NET_CONFIG["arch"],
    rand_seed=1,
    device=device,
)

In [5]:
from agilerl.training.train_multi_agent import train_multi_agent
import gymnasium as gym
import torch

trained_pop, pop_fitnesses = train_multi_agent(
    env=env,  # Pettingzoo-style environment
    env_name='mcrafter-2agents',  # Environment name
    algo="MADDPG",  # Algorithm
    pop=pop,  # Population of agents
    memory=memory,  # Replay buffer
    INIT_HP=INIT_HP,  # IINIT_HP dictionary
    net_config=NET_CONFIG,  # Network configuration
    swap_channels=INIT_HP['CHANNELS_LAST'],  # Swap image channel from last to first
    max_steps=350_000,  # Max number of training steps
    evo_steps=5000,  # Evolution frequency
    eval_steps=None,  # Number of steps in evaluation episode
    eval_loop=1,  # Number of evaluation episodes
    learning_delay=1000,  # Steps before starting learning
    target=4000.,  # Target score for early stopping
    tournament=tournament,  # Tournament selection object
    mutation=mutations,  # Mutations object
    wb=False,  # Weights and Biases tracking
)


Training...


  1%|1         | 5000/350000 [  00:51<  56:26, 101.89step/s]


DateTime, now, H:m:s-u 22 : 10 : 11 - 10192
Steps 148.1708393912262 per sec, 8890.250363473571 per min.

                --- Global Steps 10000 ---
                Fitness:	['1356.00', '1356.00']
                Score:		[1113.875, 1317.375]
                5 fitness avgs:	['1356.00', '1356.00']
                10 score avgs:	['1317.38', '1317.38']
                Agents:		[1, 2]
                Steps:		[5000, 5000]
                Mutations:	['arch', 'param']
                

  3%|2         | 10000/350000 [  01:40<  53:01, 106.86step/s]


DateTime, now, H:m:s-u 22 : 10 : 57 - 297448
Steps 175.78257879544648 per sec, 10546.95472772679 per min.

                --- Global Steps 20000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1356.0, 1410.5]
                5 fitness avgs:	['1695.00', '1695.00']
                10 score avgs:	['1395.00', '1395.00']
                Agents:		[2, 3]
                Steps:		[10000, 10000]
                Mutations:	['lr_actor', 'None']
                

  4%|4         | 15000/350000 [  02:40<  52:04, 107.22step/s]


DateTime, now, H:m:s-u 22 : 11 : 51 - 56692
Steps 179.06582103879012 per sec, 10743.949262327407 per min.

                --- Global Steps 30000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1988.0, 2010.0]
                5 fitness avgs:	['1808.00', '1808.00']
                10 score avgs:	['1879.20', '1861.60']
                Agents:		[3, 4]
                Steps:		[15000, 15000]
                Mutations:	['param', 'param']
                

  6%|5         | 20000/350000 [  03:40<  55:50, 98.48step/s] 


DateTime, now, H:m:s-u 22 : 12 : 43 - 437828
Steps 181.886557794268 per sec, 10913.19346765608 per min.

                --- Global Steps 40000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1961.375, 1979.875]
                5 fitness avgs:	['1864.50', '1864.50']
                10 score avgs:	['1989.70', '1989.70']
                Agents:		[4, 5]
                Steps:		[20000, 20000]
                Mutations:	['arch', 'None']
                

  7%|7         | 25000/350000 [  04:12<  51:12, 105.77step/s]


DateTime, now, H:m:s-u 22 : 13 : 31 - 212637
Steps 186.78174376465927 per sec, 11206.904625879557 per min.

                --- Global Steps 50000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2021.0, 2033.5]
                5 fitness avgs:	['1898.40', '1898.40']
                10 score avgs:	['1990.30', '1990.30']
                Agents:		[5, 6]
                Steps:		[25000, 25000]
                Mutations:	['arch', 'param']
                

  9%|8         | 30000/350000 [  05:00<  49:33, 107.61step/s]


DateTime, now, H:m:s-u 22 : 14 : 18 - 736136
Steps 190.34589976566537 per sec, 11420.753985939922 per min.

                --- Global Steps 60000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2016.5, 2005.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2011.40', '2020.00']
                Agents:		[6, 7]
                Steps:		[30000, 30000]
                Mutations:	['arch', 'None']
                

 10%|#         | 35000/350000 [  06:00<  48:06, 109.13step/s]


DateTime, now, H:m:s-u 22 : 15 : 6 - 274708
Steps 192.9681439836308 per sec, 11578.08863901785 per min.

                --- Global Steps 70000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2012.5, 2034.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2034.00', '2034.00']
                Agents:		[7, 8]
                Steps:		[35000, 35000]
                Mutations:	['arch', 'None']
                

 11%|#1        | 40000/350000 [  06:50<  47:17, 109.24step/s]


DateTime, now, H:m:s-u 22 : 16 : 3 - 101210
Steps 190.6665452327601 per sec, 11439.992713965607 per min.

                --- Global Steps 80000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1987.75, 2008.25]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2013.40', '2013.40']
                Agents:		[8, 9]
                Steps:		[40000, 40000]
                Mutations:	['param', 'param']
                

 13%|#2        | 45000/350000 [  07:50<  51:15, 99.16step/s] 


DateTime, now, H:m:s-u 22 : 16 : 59 - 497611
Steps 189.08473241548492 per sec, 11345.083944929094 per min.

                --- Global Steps 90000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2052.5, 2020.375]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2013.70', '2039.40']
                Agents:		[9, 10]
                Steps:		[45000, 45000]
                Mutations:	['param', 'arch']
                

 14%|#4        | 50000/350000 [  08:40<  50:02, 99.93step/s]


DateTime, now, H:m:s-u 22 : 17 : 47 - 139972
Steps 190.97838859598704 per sec, 11458.703315759221 per min.

                --- Global Steps 100000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2011.25, 2034.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2049.40', '2049.40']
                Agents:		[10, 11]
                Steps:		[50000, 50000]
                Mutations:	['None', 'None']
                

 16%|#5        | 55000/350000 [  09:30<  47:45, 102.94step/s]


DateTime, now, H:m:s-u 22 : 18 : 41 - 160398
Steps 190.43006803050145 per sec, 11425.804081830087 per min.

                --- Global Steps 110000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 1975.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1987.40', '1987.40']
                Agents:		[11, 12]
                Steps:		[55000, 55000]
                Mutations:	['None', 'param']
                

 17%|#7        | 60000/350000 [  10:10<  46:48, 103.27step/s]


DateTime, now, H:m:s-u 22 : 19 : 28 - 89164
Steps 192.13260729522412 per sec, 11527.956437713447 per min.

                --- Global Steps 120000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2025.75, 2019.5]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1991.60', '1991.60']
                Agents:		[12, 13]
                Steps:		[60000, 60000]
                Mutations:	['None', 'lr_critic']
                

 19%|#8        | 65000/350000 [  11:10<  44:21, 107.08step/s]


DateTime, now, H:m:s-u 22 : 20 : 15 - 292468
Steps 193.51805813649167 per sec, 11611.0834881895 per min.

                --- Global Steps 130000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2006.0, 2004.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2010.60', '2010.60']
                Agents:		[13, 14]
                Steps:		[65000, 65000]
                Mutations:	['param', 'lr_critic']
                

 20%|##        | 70000/350000 [  11:45<  42:40, 109.37step/s]


DateTime, now, H:m:s-u 22 : 21 : 2 - 702644
Steps 194.66557460197438 per sec, 11679.934476118462 per min.

                --- Global Steps 140000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2028.25, 1962.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1976.40', '2029.40']
                Agents:		[14, 15]
                Steps:		[70000, 70000]
                Mutations:	['lr_critic', 'None']
                

 21%|##1       | 75000/350000 [  12:50<  42:08, 108.77step/s]


DateTime, now, H:m:s-u 22 : 21 : 57 - 780396
Steps 193.73340236205019 per sec, 11624.004141723011 per min.

                --- Global Steps 150000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2023.0, 2013.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2017.80', '2025.20']
                Agents:		[15, 16]
                Steps:		[75000, 75000]
                Mutations:	['bs', 'param']
                

 23%|##2       | 80000/350000 [  13:40<  44:14, 101.73step/s]


DateTime, now, H:m:s-u 22 : 22 : 55 - 213045
Steps 192.3787895170188 per sec, 11542.727371021127 per min.

                --- Global Steps 160000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1952.75, 1990.25]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1999.00', '1969.00']
                Agents:		[16, 17]
                Steps:		[80000, 80000]
                Mutations:	['arch', 'param']
                

 24%|##4       | 85000/350000 [  14:24<  44:07, 100.09step/s]


DateTime, now, H:m:s-u 22 : 23 : 42 - 591661
Steps 193.38594032550785 per sec, 11603.156419530469 per min.

                --- Global Steps 170000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 1996.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1938.60', '1938.60']
                Agents:		[17, 18]
                Steps:		[85000, 85000]
                Mutations:	['lr_actor', 'lr_critic']
                

 26%|##5       | 90000/350000 [  15:30<  41:03, 105.55step/s]


DateTime, now, H:m:s-u 22 : 24 : 34 - 988752
Steps 193.2433084058097 per sec, 11594.598504348582 per min.

                --- Global Steps 180000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2032.25, 2028.25]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2021.80', '2025.00']
                Agents:		[18, 19]
                Steps:		[90000, 90000]
                Mutations:	['param', 'None']
                

 27%|##7       | 95000/350000 [  16:04<  41:10, 103.22step/s]


DateTime, now, H:m:s-u 22 : 25 : 22 - 779664
Steps 194.02423069792263 per sec, 11641.453841875358 per min.

                --- Global Steps 190000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 2034.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2034.00', '2034.00']
                Agents:		[19, 20]
                Steps:		[95000, 95000]
                Mutations:	['None', 'arch']
                

 29%|##8       | 100000/350000 [  17:10<  38:42, 107.65step/s]


DateTime, now, H:m:s-u 22 : 26 : 16 - 566176
Steps 193.60228615006727 per sec, 11616.137169004036 per min.

                --- Global Steps 200000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 2005.125]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2010.90', '2010.90']
                Agents:		[20, 21]
                Steps:		[100000, 100000]
                Mutations:	['param', 'param']
                

 30%|###       | 105000/350000 [  17:50<  41:08, 99.26step/s] 


DateTime, now, H:m:s-u 22 : 27 : 12 - 536501
Steps 192.83463746792145 per sec, 11570.078248075288 per min.

                --- Global Steps 210000 ---
                Fitness:	['1398.00', '2034.00']
                Score:		[1437.375, 2034.0]
                5 fitness avgs:	['2034.00', '1906.80']
                10 score avgs:	['2034.00', '1556.70']
                Agents:		[21, 22]
                Steps:		[105000, 105000]
                Mutations:	['ls', 'param']
                

 31%|###1      | 110000/350000 [  19:00<  39:45, 100.61step/s]


DateTime, now, H:m:s-u 22 : 28 : 7 - 388952
Steps 192.3298132170182 per sec, 11539.788793021093 per min.

                --- Global Steps 220000 ---
                Fitness:	['2034.00', '1398.00']
                Score:		[2034.0, 1397.75]
                5 fitness avgs:	['2034.00', '1779.60']
                10 score avgs:	['2034.00', '1397.80']
                Agents:		[21, 23]
                Steps:		[110000, 110000]
                Mutations:	['param', 'ls']
                

 33%|###2      | 115000/350000 [  19:33<  37:54, 103.30step/s]


DateTime, now, H:m:s-u 22 : 28 : 51 - 648871
Steps 193.5817804759832 per sec, 11614.906828558993 per min.

                --- Global Steps 230000 ---
                Fitness:	['2034.00', '1398.00']
                Score:		[2034.0, 1398.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2034.00', '2034.00']
                Agents:		[21, 24]
                Steps:		[115000, 115000]
                Mutations:	['None', 'None']
                

 34%|###4      | 120000/350000 [  20:30<  34:47, 110.17step/s]


DateTime, now, H:m:s-u 22 : 29 : 38 - 246984
Steps 194.37503658749853 per sec, 11662.50219524991 per min.

                --- Global Steps 240000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 2020.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2022.80', '2022.80']
                Agents:		[24, 25]
                Steps:		[120000, 120000]
                Mutations:	['param', 'arch']
                

 36%|###5      | 125000/350000 [  21:20<  33:25, 112.18step/s]


DateTime, now, H:m:s-u 22 : 30 : 23 - 434146
Steps 195.3256827785119 per sec, 11719.540966710712 per min.

                --- Global Steps 250000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1997.25, 1979.875]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1990.70', '1990.70']
                Agents:		[25, 26]
                Steps:		[125000, 125000]
                Mutations:	['None', 'None']
                

 37%|###7      | 130000/350000 [  22:00<  31:31, 116.32step/s]


DateTime, now, H:m:s-u 22 : 31 : 7 - 132539
Steps 196.43218619309957 per sec, 11785.931171585975 per min.

                --- Global Steps 260000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1991.5, 2033.625]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1990.40', '1990.40']
                Agents:		[26, 27]
                Steps:		[130000, 130000]
                Mutations:	['arch', 'ls']
                

 39%|###8      | 135000/350000 [  22:50<  30:57, 115.77step/s]


DateTime, now, H:m:s-u 22 : 31 : 58 - 218087
Steps 196.40683640143106 per sec, 11784.410184085864 per min.

                --- Global Steps 270000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 2034.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2033.70', '2033.70']
                Agents:		[27, 28]
                Steps:		[135000, 135000]
                Mutations:	['None', 'param']
                

 40%|####      | 140000/350000 [  23:40<  32:19, 108.29step/s]


DateTime, now, H:m:s-u 22 : 32 : 45 - 774449
Steps 196.87061286243744 per sec, 11812.236771746246 per min.

                --- Global Steps 280000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 2019.25]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2022.20', '2022.20']
                Agents:		[28, 29]
                Steps:		[140000, 140000]
                Mutations:	['arch', 'None']
                

 41%|####1     | 145000/350000 [  24:30<  31:42, 107.74step/s]


DateTime, now, H:m:s-u 22 : 33 : 34 - 192833
Steps 197.18872782617163 per sec, 11831.323669570298 per min.

                --- Global Steps 290000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2019.5, 2017.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2021.00', '2022.40']
                Agents:		[29, 30]
                Steps:		[145000, 145000]
                Mutations:	['arch', 'None']
                

 43%|####2     | 150000/350000 [  25:03<  30:13, 110.27step/s]


DateTime, now, H:m:s-u 22 : 34 : 22 - 324990
Steps 197.5237817003769 per sec, 11851.426902022615 per min.

                --- Global Steps 300000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2002.125, 1971.5]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1972.40', '1972.40']
                Agents:		[30, 31]
                Steps:		[150000, 150000]
                Mutations:	['arch', 'bs']
                

 44%|####4     | 155000/350000 [  25:52<  29:52, 108.77step/s]


DateTime, now, H:m:s-u 22 : 35 : 11 - 74906
Steps 197.7602840458156 per sec, 11865.617042748936 per min.

                --- Global Steps 310000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1930.75, 1993.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1980.80', '1980.80']
                Agents:		[31, 32]
                Steps:		[155000, 155000]
                Mutations:	['arch', 'arch']
                

 46%|####5     | 160000/350000 [  26:41<  29:05, 108.84step/s]


DateTime, now, H:m:s-u 22 : 35 : 59 - 661417
Steps 198.00253981575847 per sec, 11880.152388945507 per min.

                --- Global Steps 320000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1978.25, 1953.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1953.40', '1953.40']
                Agents:		[32, 33]
                Steps:		[160000, 160000]
                Mutations:	['ls', 'arch']
                

 47%|####7     | 165000/350000 [  27:30<  28:24, 108.53step/s]


DateTime, now, H:m:s-u 22 : 36 : 48 - 765931
Steps 198.1689911332115 per sec, 11890.139467992689 per min.

                --- Global Steps 330000 ---
                Fitness:	['2034.00', '678.00']
                Score:		[2000.0, 679.25]
                5 fitness avgs:	['2034.00', '1762.80']
                10 score avgs:	['2000.20', '943.60']
                Agents:		[32, 34]
                Steps:		[165000, 165000]
                Mutations:	['arch', 'arch']
                

 49%|####8     | 170000/350000 [  28:22<  28:32, 105.13step/s]


DateTime, now, H:m:s-u 22 : 37 : 40 - 499505
Steps 198.02222734430228 per sec, 11881.333640658137 per min.

                --- Global Steps 340000 ---
                Fitness:	['2034.00', '678.00']
                Score:		[2025.0, 678.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2026.80', '2026.80']
                Agents:		[32, 35]
                Steps:		[170000, 170000]
                Mutations:	['lr_critic', 'None']
                

 50%|#####     | 175000/350000 [  29:12<  27:39, 105.46step/s]


DateTime, now, H:m:s-u 22 : 38 : 30 - 585843
Steps 198.06851434827624 per sec, 11884.110860896575 per min.

                --- Global Steps 350000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2033.5, 2009.5]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2022.80', '2042.00']
                Agents:		[35, 36]
                Steps:		[175000, 175000]
                Mutations:	['None', 'param']
                

 51%|#####1    | 180000/350000 [  30:02<  26:45, 105.87step/s]


DateTime, now, H:m:s-u 22 : 39 : 20 - 953088
Steps 198.08162816960024 per sec, 11884.897690176014 per min.

                --- Global Steps 360000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2028.75, 2020.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2022.80', '2022.80']
                Agents:		[36, 37]
                Steps:		[180000, 180000]
                Mutations:	['arch', 'arch']
                

 53%|#####2    | 185000/350000 [  30:53<  26:20, 104.42step/s]


DateTime, now, H:m:s-u 22 : 40 : 11 - 831337
Steps 198.0398550058296 per sec, 11882.391300349775 per min.

                --- Global Steps 370000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 2026.25]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2026.20', '2026.20']
                Agents:		[37, 38]
                Steps:		[185000, 185000]
                Mutations:	['arch', 'arch']
                

 54%|#####4    | 190000/350000 [  31:43<  25:34, 104.23step/s]


DateTime, now, H:m:s-u 22 : 41 : 2 - 8315
Steps 198.07267072011527 per sec, 11884.360243206916 per min.

                --- Global Steps 380000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2027.75, 2009.5]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2008.20', '2008.20']
                Agents:		[38, 39]
                Steps:		[190000, 190000]
                Mutations:	['bs', 'arch']
                

 56%|#####5    | 195000/350000 [  32:50<  24:34, 105.16step/s]


DateTime, now, H:m:s-u 22 : 42 : 3 - 192877
Steps 197.00229699559765 per sec, 11820.137819735857 per min.

                --- Global Steps 390000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2001.25, 1991.5]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2000.00', '2000.00']
                Agents:		[39, 40]
                Steps:		[195000, 195000]
                Mutations:	['param', 'param']
                

 57%|#####7    | 200000/350000 [  33:50<  26:41, 93.64step/s] 


DateTime, now, H:m:s-u 22 : 42 : 55 - 775978
Steps 196.82565073489138 per sec, 11809.539044093483 per min.

                --- Global Steps 400000 ---
                Fitness:	['2034.00', '1398.00']
                Score:		[2024.0, 1363.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2026.00', '2026.00']
                Agents:		[39, 41]
                Steps:		[200000, 200000]
                Mutations:	['None', 'arch']
                

 59%|#####8    | 205000/350000 [  34:30<  25:11, 95.93step/s]


DateTime, now, H:m:s-u 22 : 43 : 48 - 872673
Steps 196.60947995837927 per sec, 11796.568797502758 per min.

                --- Global Steps 410000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2027.75, 2033.875]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2033.90', '2033.90']
                Agents:		[41, 42]
                Steps:		[205000, 205000]
                Mutations:	['None', 'arch']
                

 60%|######    | 210000/350000 [  35:40<  23:17, 100.20step/s]


DateTime, now, H:m:s-u 22 : 44 : 48 - 603817
Steps 195.79659380697302 per sec, 11747.795628418382 per min.

                --- Global Steps 420000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 2030.25]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2030.90', '2030.90']
                Agents:		[42, 43]
                Steps:		[210000, 210000]
                Mutations:	['None', 'param']
                

 61%|######1   | 215000/350000 [  36:40<  25:08, 89.48step/s] 


DateTime, now, H:m:s-u 22 : 45 : 56 - 142872
Steps 194.33953545380245 per sec, 11660.372127228145 per min.

                --- Global Steps 430000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1979.125, 1982.5]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1992.80', '1992.80']
                Agents:		[43, 44]
                Steps:		[215000, 215000]
                Mutations:	['None', 'param']
                

 63%|######2   | 220000/350000 [  37:40<  24:24, 88.77step/s]


DateTime, now, H:m:s-u 22 : 46 : 46 - 193811
Steps 194.46024542455888 per sec, 11667.614725473532 per min.

                --- Global Steps 440000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1985.0, 2034.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1992.80', '1992.80']
                Agents:		[44, 45]
                Steps:		[220000, 220000]
                Mutations:	['bs', 'arch']
                

 64%|######4   | 225000/350000 [  38:30<  21:32, 96.71step/s]


DateTime, now, H:m:s-u 22 : 47 : 36 - 579664
Steps 194.54755591419865 per sec, 11672.85335485192 per min.

                --- Global Steps 450000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 1987.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1997.00', '1997.00']
                Agents:		[45, 46]
                Steps:		[225000, 225000]
                Mutations:	['ls', 'bs']
                

 66%|######5   | 230000/350000 [  39:20<  19:32, 102.35step/s]


DateTime, now, H:m:s-u 22 : 48 : 25 - 177871
Steps 194.77846853829922 per sec, 11686.708112297953 per min.

                --- Global Steps 460000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1936.25, 2034.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2034.00', '1955.80']
                Agents:		[46, 47]
                Steps:		[230000, 230000]
                Mutations:	['ls', 'arch']
                

 67%|######7   | 235000/350000 [  39:53<  17:54, 107.06step/s]


DateTime, now, H:m:s-u 22 : 49 : 12 - 870850
Steps 195.0733339634341 per sec, 11704.400037806046 per min.

                --- Global Steps 470000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 2034.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1962.20', '2034.00']
                Agents:		[47, 48]
                Steps:		[235000, 235000]
                Mutations:	['bs', 'param']
                

 69%|######8   | 240000/350000 [  41:00<  16:52, 108.63step/s]


DateTime, now, H:m:s-u 22 : 50 : 17 - 477307
Steps 194.02117535326323 per sec, 11641.270521195793 per min.

                --- Global Steps 480000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2015.5, 2034.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2034.00', '2034.00']
                Agents:		[48, 49]
                Steps:		[240000, 240000]
                Mutations:	['param', 'bs']
                

 70%|#######   | 245000/350000 [  42:00<  18:43, 93.47step/s] 


DateTime, now, H:m:s-u 22 : 51 : 17 - 954130
Steps 193.33708366945618 per sec, 11600.22502016737 per min.

                --- Global Steps 490000 ---
                Fitness:	['1356.00', '2034.00']
                Score:		[1319.5, 2038.125]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2037.30', '2037.30']
                Agents:		[49, 50]
                Steps:		[245000, 245000]
                Mutations:	['param', 'None']
                

 71%|#######1  | 250000/350000 [  42:45<  17:39, 94.35step/s]


DateTime, now, H:m:s-u 22 : 52 : 3 - 129852
Steps 193.82780056015764 per sec, 11629.66803360946 per min.

                --- Global Steps 500000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2005.25, 1980.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1994.10', '1994.10']
                Agents:		[50, 51]
                Steps:		[250000, 250000]
                Mutations:	['bs', 'lr_critic']
                

 73%|#######2  | 255000/350000 [  43:31<  15:16, 103.70step/s]


DateTime, now, H:m:s-u 22 : 52 : 50 - 346756
Steps 194.15064289686387 per sec, 11649.038573811833 per min.

                --- Global Steps 510000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 2027.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2029.00', '2029.00']
                Agents:		[51, 52]
                Steps:		[255000, 255000]
                Mutations:	['bs', 'arch']
                

 74%|#######4  | 260000/350000 [  44:30<  13:47, 108.71step/s]


DateTime, now, H:m:s-u 22 : 53 : 37 - 180358
Steps 194.4899630897168 per sec, 11669.39778538301 per min.

                --- Global Steps 520000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 1979.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1985.00', '1985.00']
                Agents:		[52, 53]
                Steps:		[260000, 260000]
                Mutations:	['arch', 'None']
                

 76%|#######5  | 265000/350000 [  45:20<  12:38, 112.04step/s]


DateTime, now, H:m:s-u 22 : 54 : 39 - 838813
Steps 193.69091796426062 per sec, 11621.455077855637 per min.

                --- Global Steps 530000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1966.5, 2028.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2029.80', '1980.00']
                Agents:		[53, 54]
                Steps:		[265000, 265000]
                Mutations:	['param', 'None']
                

 77%|#######7  | 270000/350000 [  46:20<  13:38, 97.70step/s] 


DateTime, now, H:m:s-u 22 : 55 : 28 - 289910
Steps 193.91192878335633 per sec, 11634.71572700138 per min.

                --- Global Steps 540000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 2018.375]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1976.10', '2034.00']
                Agents:		[54, 55]
                Steps:		[270000, 270000]
                Mutations:	['ls', 'param']
                

 79%|#######8  | 275000/350000 [  47:10<  12:10, 102.65step/s]


DateTime, now, H:m:s-u 22 : 56 : 15 - 792283
Steps 194.19040578358093 per sec, 11651.424347014856 per min.

                --- Global Steps 550000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2009.75, 2031.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2032.20', '2002.30']
                Agents:		[55, 56]
                Steps:		[275000, 275000]
                Mutations:	['arch', 'bs']
                

 80%|########  | 280000/350000 [  48:00<  11:03, 105.49step/s]


DateTime, now, H:m:s-u 22 : 57 : 19 - 589818
Steps 193.3655391394758 per sec, 11601.93234836855 per min.

                --- Global Steps 560000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2020.0, 2033.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2033.80', '2033.80']
                Agents:		[56, 57]
                Steps:		[280000, 280000]
                Mutations:	['param', 'param']
                

 81%|########1 | 285000/350000 [  49:00<  11:33, 93.67step/s] 


DateTime, now, H:m:s-u 22 : 58 : 8 - 609979
Steps 193.5425087890101 per sec, 11612.550527340605 per min.

                --- Global Steps 570000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2018.25, 2034.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2033.80', '2033.80']
                Agents:		[57, 58]
                Steps:		[285000, 285000]
                Mutations:	['None', 'lr_actor']
                

 83%|########2 | 290000/350000 [  49:50<  10:00, 99.92step/s]


DateTime, now, H:m:s-u 22 : 59 : 9 - 449936
Steps 192.9519701368624 per sec, 11577.118208211745 per min.

                --- Global Steps 580000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2034.0, 2019.25]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2022.20', '2022.20']
                Agents:		[58, 59]
                Steps:		[290000, 290000]
                Mutations:	['param', 'arch']
                

 84%|########4 | 295000/350000 [  51:00<  09:44, 94.03step/s]


DateTime, now, H:m:s-u 23 : 0 : 15 - 819352
Steps 192.038611600044 per sec, 11522.31669600264 per min.

                --- Global Steps 590000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2024.75, 2027.25]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2028.60', '2028.60']
                Agents:		[59, 60]
                Steps:		[295000, 295000]
                Mutations:	['param', 'None']
                

 86%|########5 | 300000/350000 [  52:00<  09:30, 87.60step/s]


DateTime, now, H:m:s-u 23 : 1 : 14 - 288788
Steps 191.64625150836025 per sec, 11498.775090501615 per min.

                --- Global Steps 600000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2031.75, 2004.875]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2010.70', '2010.70']
                Agents:		[60, 61]
                Steps:		[300000, 300000]
                Mutations:	['param', 'param']
                

 87%|########7 | 305000/350000 [  53:00<  08:21, 89.79step/s]


DateTime, now, H:m:s-u 23 : 2 : 5 - 75362
Steps 191.7301548770532 per sec, 11503.809292623193 per min.

                --- Global Steps 610000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1939.75, 2023.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2025.80', '1958.60']
                Agents:		[61, 62]
                Steps:		[305000, 305000]
                Mutations:	['arch', 'arch']
                

 89%|########8 | 310000/350000 [  53:50<  06:53, 96.85step/s]


DateTime, now, H:m:s-u 23 : 2 : 58 - 617267
Steps 191.64805574380486 per sec, 11498.883344628292 per min.

                --- Global Steps 620000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1998.375, 1993.5]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2001.60', '2001.60']
                Agents:		[62, 63]
                Steps:		[310000, 310000]
                Mutations:	['param', 'ls']
                

 90%|######### | 315000/350000 [  54:50<  06:02, 96.53step/s]


DateTime, now, H:m:s-u 23 : 4 : 44 - 920054
Steps 188.54375100765293 per sec, 11312.625060459175 per min.

                --- Global Steps 630000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1996.75, 2007.25]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2012.60', '2012.60']
                Agents:		[63, 64]
                Steps:		[315000, 315000]
                Mutations:	['param', 'param']
                

 91%|#########1| 320000/350000 [  56:30<  07:25, 67.38step/s]


DateTime, now, H:m:s-u 23 : 5 : 39 - 92499
Steps 188.48076411360395 per sec, 11308.845846816235 per min.

                --- Global Steps 640000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2002.5, 2011.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1994.20', '1994.20']
                Agents:		[64, 65]
                Steps:		[320000, 320000]
                Mutations:	['None', 'arch']
                

 93%|#########2| 325000/350000 [  57:14<  05:14, 79.43step/s]


DateTime, now, H:m:s-u 23 : 6 : 33 - 489929
Steps 188.4074684580579 per sec, 11304.448107483475 per min.

                --- Global Steps 650000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1996.5, 2034.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2034.00', '2034.00']
                Agents:		[65, 66]
                Steps:		[325000, 325000]
                Mutations:	['None', 'None']
                

 94%|#########4| 330000/350000 [  58:12<  03:56, 84.53step/s]


DateTime, now, H:m:s-u 23 : 7 : 33 - 466393
Steps 188.03708826412557 per sec, 11282.225295847535 per min.

                --- Global Steps 660000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1977.0, 2040.625]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2039.30', '1988.40']
                Agents:		[66, 67]
                Steps:		[330000, 330000]
                Mutations:	['bs', 'arch']
                

 96%|#########5| 335000/350000 [  59:20<  02:45, 90.70step/s]


DateTime, now, H:m:s-u 23 : 8 : 26 - 708893
Steps 188.03384309181578 per sec, 11282.030585508946 per min.

                --- Global Steps 670000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2027.125, 2024.5]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1980.80', '2033.80']
                Agents:		[67, 68]
                Steps:		[335000, 335000]
                Mutations:	['lr_actor', 'bs']
                

 97%|#########7| 340000/350000 [1:00:20<  01:45, 94.74step/s]


DateTime, now, H:m:s-u 23 : 9 : 48 - 488984
Steps 186.55854196550246 per sec, 11193.512517930149 per min.

                --- Global Steps 680000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2029.5, 1958.5]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1970.40', '1970.40']
                Agents:		[68, 69]
                Steps:		[340000, 340000]
                Mutations:	['param', 'None']
                

 99%|#########8| 345000/350000 [1:01:40<  01:04, 77.70step/s]


DateTime, now, H:m:s-u 23 : 10 : 43 - 956180
Steps 186.46453192668133 per sec, 11187.87191560088 per min.

                --- Global Steps 690000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[1020.125, 2033.75]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['2033.80', '1222.90']
                Agents:		[69, 70]
                Steps:		[345000, 345000]
                Mutations:	['None', 'arch']
                

100%|##########| 350000/350000 [1:02:33<  00:00, 93.23step/s]


DateTime, now, H:m:s-u 23 : 11 : 37 - 486969
Steps 186.46943508243103 per sec, 11188.166104945863 per min.

                --- Global Steps 700000 ---
                Fitness:	['2034.00', '2034.00']
                Score:		[2017.25, 2034.0]
                5 fitness avgs:	['2034.00', '2034.00']
                10 score avgs:	['1830.60', '1830.60']
                Agents:		[70, 71]
                Steps:		[350000, 350000]
                Mutations:	['None', 'bs']
                


