# REACTOR

Proviene del caso de la notebook

## 1. Imports

In [1]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import random
import os

from typing import Dict, Any, List
import torch
import torch.nn as nn
from torchinfo import summary

import sys
sys.path.append('..')  # Subir un nivel para acceder a las carpetas

# Imports del proyecto
from Environment.simulation_env import SimulationPIDEnv # Ambiente de simulaci√≥n PID
from Simulations_Env.reactor_CSTR import CSTRSimulator # Simulador de tanque simple
from Agent.DQN.algorithm_DQN import DQNAgent  # Agente DQN para control
from Agent.Actor_Critic.algorithm_ActorCritic import ActorCriticAgent
from Environment.multi_agent_env_modular import MultiAgentPIDEnv
from Entrenamiento.controller_agent import ControllerAgent

# Configuraci√≥n de matplotlib
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

print("‚úÖ Imports completados")
print(f"PyTorch version: {torch.__version__}")
print(f"Device disponible: {'cuda' if torch.cuda.is_available() else 'cpu'}")

‚úÖ Imports completados
PyTorch version: 2.8.0+cu126
Device disponible: cuda


In [3]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Wed Nov 19 19:53:56 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   33C    P0             46W /  400W |       5MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [4]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 89.6 gigabytes of available RAM

You are using a high-RAM runtime!


In [5]:
torch.zeros(1).cuda()

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Running on {DEVICE}")

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True

print("Cuda Available:", torch.cuda.is_available())

Running on cuda:0
Cuda Available: True


## W&B

In [6]:
!pip install wandb



In [None]:
import wandb
import os

# Deshabilitar hooks de Jupyter para evitar AlreadyJoinedError
os.environ["WANDB_NOTEBOOK_NAME"] = "multi_agente_reactor"
os.environ["WANDB_SILENT"] = "true"

# Login
wandb.login()

# Desregistrar hooks de Jupyter que causan AlreadyJoinedError
try:
    from IPython import get_ipython
    ipython = get_ipython()
    if ipython is not None:
        # Remover callbacks de wandb si existen
        callbacks = ipython.events.callbacks
        for event in ['pre_run_cell', 'post_run_cell']:
            if event in callbacks:
                # Filtrar callbacks de wandb
                callbacks[event] = [cb for cb in callbacks[event] 
                                   if 'wandb' not in str(cb).lower()]
        print("‚úÖ Hooks de Jupyter de WandB deshabilitados")
except Exception as e:
    print(f"‚ö†Ô∏è No se pudieron deshabilitar hooks: {e}")
    pass

In [8]:
WANDB_TEAM_NAME = "valeriaeskenazi-universidad-ort-uruguay"
WANDB_PROJECT = "Tesis_maestr√≠a"

sweep_config = {
    "name": "multi_agent_reactor_cstr",
    "method": "random",

    "metric": {
        "name": "global_reward",  # Recompensa del orquestador
        "goal": "maximize"
    },

    "parameters": {
        "mode": {
            "value": "indirect"
        },

        # ========== CONTROLADORES (DQN) ==========
        "controller_hidden_dims": {
            "values": [
                [64, 64],
                [128, 128, 64]
            ]
        },

        "controller_lr": {
            "values": [0.0001, 0.001]
        },

        "controller_gamma": {
            "values": [0.99, 0.999]
        },

        # ========== ORQUESTADOR (Actor-Critic) ==========
        "orch_hidden_dims": {
            "values": [
                [64, 64],
                [128, 128, 64]
            ]
        },

        "orch_lr_actor": {
            "values": [0.00001, 0.0001]
        },

        "orch_lr_critic": {
            "values": [0.0001, 0.001]
        },

        "orch_gamma": {
            "values": [0.99, 0.999]
        },

        # ========== ENTRENAMIENTO ==========
        "n_episodes": {
            "values": [5, 10]
        },

        "j_max_retries": {
            "values": [3, 5]
        },

        "r_orchestrator_iterations": {
            "values": [10, 20]
        },

        # ========== REACTOR ==========
        "n_manipulable_vars": {
            "value": 2  # Flujo y Temp
        },

        "n_target_vars": {
            "value": 1  # CB
        },

        "dead_band": {
            "values": [0.01, 0.02]
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project=WANDB_PROJECT)

Create sweep with ID: p6j3rtu4
Sweep URL: https://wandb.ai/valeriaeskenazi-universidad-ort-uruguay/Tesis_maestr%C3%ADa/sweeps/p6j3rtu4


In [9]:
def wandb_log_callback(iteration, global_reward, best_sp, pids, errors, stats=None):
    log_dict = {
        "iteration": iteration,
        "global_reward": global_reward,

        # Setpoints y PVs (para graficar SP vs PV)
        "sp_tc": best_sp[0],
        "sp_f": best_sp[1],

        # PIDs
        "controller_0_Kp": pids[0][0],
        "controller_0_Ki": pids[0][1],
        "controller_0_Kd": pids[0][2],
        "controller_1_Kp": pids[1][0],
        "controller_1_Ki": pids[1][1],
        "controller_1_Kd": pids[1][2],

        # Errores
        "controller_0_error": errors[0],
        "controller_1_error": errors[1],
        "avg_error": sum(errors) / len(errors)
    }

    # DATOS PARA GR√ÅFICOS
    if stats and 'current_pvs' in stats:
        log_dict["pv_tc"] = stats['current_pvs'][0]
        log_dict["pv_f"] = stats['current_pvs'][1]

    if stats and 'target_pv' in stats:
        log_dict["cb_actual"] = stats['target_pv']
        log_dict["cb_target"] = stats.get('target_sp', 0.2)

    wandb.log(log_dict)

In [None]:
def sweep_run():
    """
    Funci√≥n que se ejecutar√° en cada run del sweep.
    """
    
    run = None
    try:
        # Configuraci√≥n robusta de WandB para evitar errores de threading
        run = wandb.init(
            reinit=True,
            settings=wandb.Settings(
                start_method="thread",
                _disable_stats=True,
                _disable_meta=True,
            )
        )
        
        os.makedirs('results', exist_ok=True)
        os.makedirs('models', exist_ok=True)

        # ========== REPRODUCIBILIDAD ==========
        SEED = 42
        random.seed(SEED)
        np.random.seed(SEED)
        torch.manual_seed(SEED)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(SEED)
            torch.backends.cudnn.deterministic = True
            torch.backends.cudnn.benchmark = False

        wandb.config.update({"seed": SEED}, allow_val_change=True)

        # ========== LEER CONFIGURACI√ìN ==========
        config = wandb.config

        # ========== CONFIGURAR AMBIENTE MULTI-AGENTE ==========
        env_config = {
            'mode': 'indirect',
            'n_manipulable_vars': 2,
            'n_variables': 2,
            'n_target_vars': 1,
            'target_ranges': [(0.0, 1.0)],
            'target_setpoints': [0.2],
            'sp_ranges': [(290.0, 450.0), (99.0, 105.0)],
            'n_episodes': config.n_episodes,
            'j_max_retries': config.j_max_retries,
            'r_orchestrator_iterations': config.r_orchestrator_iterations,
            'upper_range': [450.0, 105.0],
            'lower_range': [290.0, 99.0],
            'setpoint': [370.0, 102.0],
            'dead_band': [config.dead_band, config.dead_band],
            'dt': 1.0,
            'max_episode_steps': 200,
            'agent_lr': config.controller_lr,
            'agent_gamma': config.controller_gamma,
            'epsilon_start': 1.0,
            'epsilon_min': 0.01,
            'epsilon_decay': 0.995,
            'hidden_dims': tuple(config.controller_hidden_dims),
            'initial_pid': (1.0, 0.1, 0.05),
            'orch_lr_actor': config.orch_lr_actor,
            'orch_lr_critic': config.orch_lr_critic,
            'orch_gamma': config.orch_gamma,
            'device': 'cpu',
            'wandb_log_callback': wandb_log_callback
        }

        # ========== CREAR AMBIENTE ==========
        multi_env = MultiAgentPIDEnv(env_config)
        reactor = CSTRSimulator(dt=1.0, control_limits=((290, 450), (99, 105)))
        multi_env.base_env.connect_external_process(reactor)

        # ========== ENTRENAR ==========
        best_pids, best_setpoints = multi_env.train()

        # ========== EVALUAR ==========
        global_reward = multi_env._execute_with_pids(best_pids, best_setpoints)

        # ========== GR√ÅFICOS CON ORQUESTADOR ACTIVO ==========
        # Resetear proceso externo y ambiente
        multi_env.base_env.reset_process()
        multi_env.base_env.reset()
        
        # Configurar PIDs optimizados
        for i in range(2):
            multi_env.base_env.pid_controllers[i].update_gains(
                kp=best_pids[i][0],
                ki=best_pids[i][1],
                kd=best_pids[i][2]
            )

        pvs_tc, pvs_f, cb_values = [], [], []
        sps_tc, sps_f = [], []
        errors_tc, errors_f, errors_orch = [], [], []
        actions_tc, actions_f = [], []

        for step in range(multi_env.max_episode_steps):
            # Obtener CB actual del reactor
            reactor_state = multi_env.base_env.external_process.get_state()
            cb_current = reactor_state[0]  # Cb est√° en √≠ndice 0
            
            # Orquestador decide nuevos setpoints bas√°ndose en CB actual
            new_setpoints = multi_env.orchestrator.decide_setpoints(
                pv_targets=[cb_current],
                sp_targets=[0.2]  # Target deseado para CB
            )
            
            # Aplicar nuevos setpoints al ambiente
            multi_env.base_env.set_setpoint(new_setpoints[0], var_idx=0)  # Tc
            multi_env.base_env.set_setpoint(new_setpoints[1], var_idx=1)  # F
            
            # Obtener estado del base_env
            state = multi_env.base_env._get_observation()

            # Controladores DQN seleccionan acciones para alcanzar los nuevos setpoints
            state_tc = state[0:6]
            state_f = state[6:12]
            action_tc = multi_env.controller_agents[0].dqn_agent.select_action(state_tc, training=False)
            action_f = multi_env.controller_agents[1].dqn_agent.select_action(state_f, training=False)

            actions_tc.append(action_tc)
            actions_f.append(action_f)

            # Ejecutar step con las acciones calculadas por los agentes
            state, _, done, _, info = multi_env.base_env.step([action_tc, action_f])

            # Guardar valores DESPU√âS del step
            pvs_tc.append(info['current_pvs'][0])
            pvs_f.append(info['current_pvs'][1])
            sps_tc.append(new_setpoints[0])  # Setpoints din√°micos del orquestador
            sps_f.append(new_setpoints[1])   # Setpoints din√°micos del orquestador
            
            # Actualizar CB despu√©s del step
            reactor_state = multi_env.base_env.external_process.get_state()
            cb_current = reactor_state[0]
            cb_values.append(cb_current)

            # Calcular errores
            errors_tc.append(new_setpoints[0] - info['current_pvs'][0])
            errors_f.append(new_setpoints[1] - info['current_pvs'][1])
            errors_orch.append(0.2 - cb_current)

            if done:
                break

        # Agregar debug para ver si hay datos
        print(f"üìä Datos recolectados:")
        print(f"   PVs TC: {len(pvs_tc)} puntos, rango: [{min(pvs_tc) if pvs_tc else 'N/A'}, {max(pvs_tc) if pvs_tc else 'N/A'}]")
        print(f"   SPs TC: {len(sps_tc)} puntos, rango: [{min(sps_tc) if sps_tc else 'N/A'}, {max(sps_tc) if sps_tc else 'N/A'}]")
        print(f"   PVs F: {len(pvs_f)} puntos, rango: [{min(pvs_f) if pvs_f else 'N/A'}, {max(pvs_f) if pvs_f else 'N/A'}]")
        print(f"   SPs F: {len(sps_f)} puntos, rango: [{min(sps_f) if sps_f else 'N/A'}, {max(sps_f) if sps_f else 'N/A'}]")
        print(f"   CB: {len(cb_values)} puntos, rango: [{min(cb_values) if cb_values else 'N/A'}, {max(cb_values) if cb_values else 'N/A'}]")

        # Crear figura con 8 subplots
        fig = plt.figure(figsize=(16, 20))
        gs = fig.add_gridspec(4, 2, hspace=0.3, wspace=0.3)

        # Fila 1: PV vs SP (con setpoints din√°micos)
        ax1 = fig.add_subplot(gs[0, 0])
        ax1.plot(pvs_tc, label='PV Tc', linewidth=2)
        ax1.plot(sps_tc, '--', label='SP Tc', linewidth=2, alpha=0.7)
        ax1.set_xlabel('Pasos de simulaci√≥n')
        ax1.set_ylabel('Temperatura (K)')
        ax1.set_title('Controlador Tc: PV vs SP')
        ax1.legend()
        ax1.grid(True, alpha=0.3)

        ax2 = fig.add_subplot(gs[0, 1])
        ax2.plot(pvs_f, label='PV F', linewidth=2)
        ax2.plot(sps_f, '--', label='SP F', linewidth=2, alpha=0.7)
        ax2.set_xlabel('Pasos de simulaci√≥n')
        ax2.set_ylabel('Flujo (m¬≥/s)')
        ax2.set_title('Controlador F: PV vs SP')
        ax2.legend()
        ax2.grid(True, alpha=0.3)

        # Fila 2: CB con deadband
        ax3 = fig.add_subplot(gs[1, :])
        ax3.plot(cb_values, label='CB actual', linewidth=2)
        ax3.axhline(0.2, color='r', linestyle='--', label='Target')
        ax3.fill_between(range(len(cb_values)), 0.19, 0.21, alpha=0.2, color='orange', label='Deadband')
        ax3.set_xlabel('Pasos de simulaci√≥n')
        ax3.set_ylabel('Concentraci√≥n CB')
        ax3.set_title('Orquestador: CB vs Target')
        ax3.legend()
        ax3.grid(True, alpha=0.3)

        # Fila 3: Errores
        ax4 = fig.add_subplot(gs[2, 0])
        ax4.plot(errors_tc, linewidth=2, color='red')
        ax4.set_xlabel('Pasos de simulaci√≥n')
        ax4.set_ylabel('Error Tc')
        ax4.set_title('Error Controlador Tc')
        ax4.axhline(0, color='black', linestyle='--', alpha=0.3)
        ax4.grid(True, alpha=0.3)

        ax5 = fig.add_subplot(gs[2, 1])
        ax5.plot(errors_f, linewidth=2, color='red')
        ax5.set_xlabel('Pasos de simulaci√≥n')
        ax5.set_ylabel('Error F')
        ax5.set_title('Error Controlador F')
        ax5.axhline(0, color='black', linestyle='--', alpha=0.3)
        ax5.grid(True, alpha=0.3)

        # Fila 4: Histogramas de acciones
        ax6 = fig.add_subplot(gs[3, 0])
        ax6.hist(actions_tc, bins=7, range=(-0.5, 6.5), edgecolor='black')
        ax6.set_xlabel('Acci√≥n')
        ax6.set_ylabel('Frecuencia')
        ax6.set_title('Distribuci√≥n Acciones DQN - Tc')
        ax6.set_xticks(range(7))
        ax6.grid(True, alpha=0.3)

        ax7 = fig.add_subplot(gs[3, 1])
        ax7.hist(actions_f, bins=7, range=(-0.5, 6.5), edgecolor='black')
        ax7.set_xlabel('Acci√≥n')
        ax7.set_ylabel('Frecuencia')
        ax7.set_title('Distribuci√≥n Acciones DQN - F')
        ax7.set_xticks(range(7))
        ax7.grid(True, alpha=0.3)

        # Agregar error del orquestador como texto
        fig.text(0.5, 0.02, f'Error promedio orquestador (CB): {np.mean(np.abs(errors_orch)):.4f}',
                ha='center', fontsize=12, bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

        # Subir a W&B
        wandb.log({"evaluation_plots": wandb.Image(fig)})
        plt.close()

        # ========== LOGUEAR M√âTRICAS FINALES ==========
        stats = multi_env.get_statistics()
        wandb.log({
            'global_reward': global_reward,
            'best_sp_tc': best_setpoints[0],
            'best_sp_f': best_setpoints[1],
            'controller_0_Kp': best_pids[0][0],
            'controller_0_Ki': best_pids[0][1],
            'controller_0_Kd': best_pids[0][2],
            'controller_1_Kp': best_pids[1][0],
            'controller_1_Ki': best_pids[1][1],
            'controller_1_Kd': best_pids[1][2],
            'success_rate': stats['pid_trainer']['success_rate'],
            'avg_best_error': stats['pid_trainer']['avg_best_error']
        })

        print(f"\n‚úÖ Run completado: {wandb.run.name}")
        print(f"   Global reward: {global_reward:.2f}")
        print(f"   Best SP: Tc={best_setpoints[0]:.2f}, F={best_setpoints[1]:.2f}")

    except Exception as e:
        print(f"‚ùå Error durante el run: {str(e)}")
        import traceback
        traceback.print_exc()
        if run is not None:
            try:
                wandb.log({"error": str(e)})
            except:
                pass
    
    finally:
        # Asegurar que wandb se cierra correctamente
        if run is not None:
            try:
                wandb.finish(quiet=True, exit_code=0)
            except Exception as e:
                print(f"‚ö†Ô∏è Warning al cerrar WandB (ignorado): {e}")
                pass

In [11]:
wandb.agent(sweep_id, function=sweep_run, count=2)

[34m[1mwandb[0m: Agent Starting Run: 0l76udqq with config:
[34m[1mwandb[0m: 	controller_gamma: 0.999
[34m[1mwandb[0m: 	controller_hidden_dims: [128, 128, 64]
[34m[1mwandb[0m: 	controller_lr: 0.0001
[34m[1mwandb[0m: 	dead_band: 0.02
[34m[1mwandb[0m: 	j_max_retries: 3
[34m[1mwandb[0m: 	mode: indirect
[34m[1mwandb[0m: 	n_episodes: 10
[34m[1mwandb[0m: 	n_manipulable_vars: 2
[34m[1mwandb[0m: 	n_target_vars: 1
[34m[1mwandb[0m: 	orch_gamma: 0.99
[34m[1mwandb[0m: 	orch_hidden_dims: [128, 128, 64]
[34m[1mwandb[0m: 	orch_lr_actor: 1e-05
[34m[1mwandb[0m: 	orch_lr_critic: 0.0001
[34m[1mwandb[0m: 	r_orchestrator_iterations: 20


  gym.logger.warn(
  gym.logger.warn(


Configurado como MULTI-AGENT (2 variables)
‚úÖ Modo: PID Tuning (Simulaci√≥n)
   N Variables: 2
   Acciones por variable: 7
   Espacio: MultiDiscrete([7, 7, ..., 7]) x2
   PID inicial: (np.float32(1.0), np.float32(0.1), np.float32(0.05))
   PIDControllers: 2 activos
Actor-Critic Agent creado
   Estado: 2 dims
   Acciones: 2 dims (continuas)
   Hidden layers: (128, 128, 64)
   LR Actor: 1e-05
   LR Critic: 0.0001
   Gamma: 0.99
   Device: cpu
‚úÖ OrchestratorAgent creado
   Variables manipulables: 2
   Rangos SP: [(290.0, 450.0), (99.0, 105.0)]
‚úÖ DQN Agent creado
   Estado: 6 dims
   Acciones: 7 (DeltaPIDActionSpace)
   Hidden layers: (128, 128, 64)
   Learning rate: 0.0001
   Gamma: 0.999
   Epsilon: 1.0 ‚Üí 0.01 (decay: 0.995)
   Device: cpu
‚úÖ DQN Agent creado
   Estado: 6 dims
   Acciones: 7 (DeltaPIDActionSpace)
   Hidden layers: (128, 128, 64)
   Learning rate: 0.0001
   Gamma: 0.999
   Epsilon: 1.0 ‚Üí 0.01 (decay: 0.995)
   Device: cpu
MultiAgentPIDEnv inicializado
Arquitectu

0,1
avg_best_error,‚ñÅ
avg_error,‚ñà‚ñá‚ñÜ‚ñÖ‚ñà‚ñà‚ñÑ‚ñÅ‚ñá‚ñÜ‚ñÜ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñà‚ñÉ‚ñá‚ñá‚ñÜ
best_sp_f,‚ñÅ
best_sp_tc,‚ñÅ
cb_actual,‚ñÇ‚ñÜ‚ñÑ‚ñÉ‚ñÜ‚ñÜ‚ñÜ‚ñÅ‚ñÜ‚ñÜ‚ñÇ‚ñÖ‚ñÑ‚ñà‚ñÇ‚ñá‚ñà‚ñá‚ñÑ‚ñá
cb_target,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
controller_0_Kd,‚ñÅ‚ñÉ‚ñÉ‚ñÅ‚ñÅ‚ñÜ‚ñÉ‚ñà‚ñÖ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÖ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñà
controller_0_Ki,‚ñÖ‚ñÉ‚ñà‚ñÖ‚ñÖ‚ñá‚ñÅ‚ñÉ‚ñÉ‚ñÉ‚ñÖ‚ñÅ‚ñÖ‚ñà‚ñÖ‚ñÖ‚ñÖ‚ñÉ‚ñÖ‚ñÖ‚ñÉ
controller_0_Kp,‚ñÜ‚ñà‚ñÇ‚ñÜ‚ñÜ‚ñÑ‚ñÜ‚ñÑ‚ñà‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÅ‚ñÜ‚ñà‚ñÑ‚ñÑ‚ñÑ
controller_0_error,‚ñÜ‚ñà‚ñÉ‚ñÇ‚ñÑ‚ñà‚ñÉ‚ñÅ‚ñÜ‚ñÖ‚ñÜ‚ñÉ‚ñÇ‚ñÑ‚ñÑ‚ñÉ‚ñÉ‚ñÇ‚ñà‚ñÜ

0,1
avg_best_error,37.7103
avg_error,37.71055
best_sp_f,102.21175
best_sp_tc,403.78528
cb_actual,0.66832
cb_target,0.2
controller_0_Kd,0.0
controller_0_Ki,0.08
controller_0_Kp,0.8
controller_0_error,37.71223


[34m[1mwandb[0m: Agent Starting Run: zemuurg6 with config:
[34m[1mwandb[0m: 	controller_gamma: 0.99
[34m[1mwandb[0m: 	controller_hidden_dims: [128, 128, 64]
[34m[1mwandb[0m: 	controller_lr: 0.001
[34m[1mwandb[0m: 	dead_band: 0.01
[34m[1mwandb[0m: 	j_max_retries: 3
[34m[1mwandb[0m: 	mode: indirect
[34m[1mwandb[0m: 	n_episodes: 5
[34m[1mwandb[0m: 	n_manipulable_vars: 2
[34m[1mwandb[0m: 	n_target_vars: 1
[34m[1mwandb[0m: 	orch_gamma: 0.999
[34m[1mwandb[0m: 	orch_hidden_dims: [64, 64]
[34m[1mwandb[0m: 	orch_lr_actor: 0.0001
[34m[1mwandb[0m: 	orch_lr_critic: 0.001
[34m[1mwandb[0m: 	r_orchestrator_iterations: 10


  gym.logger.warn(
  gym.logger.warn(


Configurado como MULTI-AGENT (2 variables)
‚úÖ Modo: PID Tuning (Simulaci√≥n)
   N Variables: 2
   Acciones por variable: 7
   Espacio: MultiDiscrete([7, 7, ..., 7]) x2
   PID inicial: (np.float32(1.0), np.float32(0.1), np.float32(0.05))
   PIDControllers: 2 activos
Actor-Critic Agent creado
   Estado: 2 dims
   Acciones: 2 dims (continuas)
   Hidden layers: (128, 128, 64)
   LR Actor: 0.0001
   LR Critic: 0.001
   Gamma: 0.999
   Device: cpu
‚úÖ OrchestratorAgent creado
   Variables manipulables: 2
   Rangos SP: [(290.0, 450.0), (99.0, 105.0)]
‚úÖ DQN Agent creado
   Estado: 6 dims
   Acciones: 7 (DeltaPIDActionSpace)
   Hidden layers: (128, 128, 64)
   Learning rate: 0.001
   Gamma: 0.99
   Epsilon: 1.0 ‚Üí 0.01 (decay: 0.995)
   Device: cpu
‚úÖ DQN Agent creado
   Estado: 6 dims
   Acciones: 7 (DeltaPIDActionSpace)
   Hidden layers: (128, 128, 64)
   Learning rate: 0.001
   Gamma: 0.99
   Epsilon: 1.0 ‚Üí 0.01 (decay: 0.995)
   Device: cpu
MultiAgentPIDEnv inicializado
Arquitectura:

0,1
avg_best_error,‚ñÅ
avg_error,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñà‚ñÇ‚ñÇ
best_sp_f,‚ñÅ
best_sp_tc,‚ñÅ
cb_actual,‚ñÅ‚ñá‚ñÖ‚ñÑ‚ñà‚ñá‚ñà‚ñÅ‚ñá‚ñà
cb_target,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
controller_0_Kd,‚ñÖ‚ñà‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
controller_0_Ki,‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñà‚ñÖ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
controller_0_Kp,‚ñÅ‚ñÅ‚ñÇ‚ñÅ‚ñÅ‚ñà‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
controller_0_error,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñà‚ñÇ‚ñÇ

0,1
avg_best_error,37.71516
avg_error,37.71314
best_sp_f,102.34326
best_sp_tc,401.5556
cb_actual,0.66828
cb_target,0.2
controller_0_Kd,4e-05
controller_0_Ki,2e-05
controller_0_Kp,0.02162
controller_0_error,37.71368
