# TANQUE SIMPLE

En esta notebook estudio el comportamiento en el caso mas simple de la industria, el llenado de un tanque mediante una entrada y una salida.

## Balance de masa:
A=E-S+G-C

En este caso, al tratarse de un caso simple, la tasa de evaporacion de egua se desprecia, por lo que no hay ni consumo (C) ni generacion (G), resultando en :

A= E-S

Siendo la Entrada y Salida en caudales, puede re arreglarse la ecuacion para epresarla en aaltura, que es lo que se busca controlar: 

Area * dh/dt = Qin - Qout

## 1. Imports

In [5]:
import numpy as np
import matplotlib.pyplot as plt
import torch

from typing import Dict, Any, List
import torch
import torch.nn as nn
from torchinfo import summary

import sys
sys.path.append('..')  # Subir un nivel para acceder a las carpetas

# Imports del proyecto
from Environment.simulation_env import SimulationPIDEnv # Ambiente de simulaci√≥n PID
from Simuladores.tanque_simple import TankSimulator # Simulador de tanque simple
from Agentes.DQN.algorithm_DQN import DQNAgent  # Agente DQN

# Configuraci√≥n de matplotlib
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

print("‚úÖ Imports completados")
print(f"PyTorch version: {torch.__version__}")
print(f"Device disponible: {'cuda' if torch.cuda.is_available() else 'cpu'}")

‚úÖ Imports completados
PyTorch version: 2.2.2
Device disponible: cpu


In [6]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

zsh:1: command not found: nvidia-smi


In [7]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 8.6 gigabytes of available RAM

To enable a high-RAM runtime, select the Runtime > "Change runtime type"
menu, and then select High-RAM in the Runtime shape dropdown. Then, 
re-execute this cell.


In [8]:
torch.zeros(1).cuda()

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Running on {DEVICE}")

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True

print("Cuda Available:", torch.cuda.is_available())

AssertionError: Torch not compiled with CUDA enabled

## 2. CONFIGURACI√ìN DE HIPERPAR√ÅMETROS

In [None]:
# Configuraci√≥n del experimento
config = {
    # Identificaci√≥n
    'experiment_name': 'dqn_tank_control',
    'run_name': 'baseline_v1',
    
    # Ambiente
    'env': {
        'upper_range': 10.0,
        'lower_range': 0.0,
        'setpoint': 5.0,
        'dead_band': 0.2,
        'max_episode_steps': 200,
        'dt': 1.0
    },
    
    # Simulador de tanque
    'tank': {
        'area': 1.0,
        'cv': 0.1,
        'max_height': 10.0,
        'max_flow_in': 0.5,
        'dt': 1.0
    },
    
    # Agente DQN
    'agent': {
        'state_dim': 6,
        'action_dim': 7,
        'hidden_dims': (128, 128, 64),
        'lr': 0.001,
        'gamma': 0.99,
        'epsilon_start': 1.0,
        'epsilon_min': 0.01,
        'epsilon_decay': 0.995,
        'memory_size': 10000,
        'batch_size': 64,
        'target_update_freq': 100,
        'device': 'cpu'
    },
    
    # Entrenamiento
    'training': {
        'n_episodes': 100,
        'log_interval': 10,      # Logear cada N episodios
        'save_interval': 50,     # Guardar modelo cada N episodios
        'eval_interval': 25,     # Evaluar cada N episodios
        'n_eval_episodes': 5     # Episodios para evaluaci√≥n
    }
}

print("‚úÖ Configuraci√≥n definida")
print(f"\nExperimento: {config['experiment_name']}")
print(f"Run: {config['run_name']}")
print(f"Episodios de entrenamiento: {config['training']['n_episodes']}")
print(f"Hidden layers: {config['agent']['hidden_dims']}")

## 3. INICIALIZAR WEIGHTS & BIASES

In [None]:
# Inicializar W&B 
wandb.init(
    project=config['experiment_name'],
    name=config['run_name'],
    config=config,
    tags=['dqn', 'tank-control', 'pid-tuning']
)

print("‚úÖ Weights & Biases inicializado")
print(f"Dashboard: {wandb.run.get_url()}")

## 4. Ambiente y Simulador de Tanque

In [None]:
# Crear ambiente de simulaci√≥n
env = SimulationPIDEnv(
    config=config['env'],
    control_mode='pid_tuning'
)

print("‚úÖ Ambiente creado")

# Crear simulador de tanque
tank = TankSimulator(
    area=config['tank']['area'],
    cv=config['tank']['cv'],
    max_height=config['tank']['max_height'],
    max_flow_in=config['tank']['max_flow_in'],
    dt=config['tank']['dt']
)

print("‚úÖ Simulador de tanque creado")


## 5. AGENTE DQN


In [None]:

# Crear agente DQN
agent = DQNAgent(
    state_dim=config['agent']['state_dim'],
    action_dim=config['agent']['action_dim'],
    hidden_dims=config['agent']['hidden_dims'],
    lr=config['agent']['lr'],
    gamma=config['agent']['gamma'],
    epsilon_start=config['agent']['epsilon_start'],
    epsilon_min=config['agent']['epsilon_min'],
    epsilon_decay=config['agent']['epsilon_decay'],
    memory_size=config['agent']['memory_size'],
    batch_size=config['agent']['batch_size'],
    target_update_freq=config['agent']['target_update_freq'],
    device=config['agent']['device']
)

print("‚úÖ Agente DQN creado")


In [None]:
# Mostrar arquitectura de la red
print("\nüìê Arquitectura de la Red Q:")
print(agent.q_network)

print(f"\nüìä Par√°metros totales: {sum(p.numel() for p in agent.q_network.parameters()):,}")

In [None]:
# Mostrar estad√≠sticas iniciales del agente
stats = agent.get_stats()

print("\nüìà Estad√≠sticas del Agente:")
for key, value in stats.items():
    print(f"  {key}: {value}")

## Modelos

In [None]:
class DQN_Network_V1(nn.Module):    
    def __init__(self, state_dim=6, n_actions=64, hidden_size=128):
        super(DQN_Network_V1, self).__init__()
        
        # Arquitectura de la red - 
        self.fc1 = nn.Linear(state_dim, hidden_size)      # 6 ‚Üí 128
        self.fc2 = nn.Linear(hidden_size, hidden_size)    # 128 ‚Üí 128  
        self.fc3 = nn.Linear(hidden_size, hidden_size//2) # 128 ‚Üí 64
        self.fc4 = nn.Linear(hidden_size//2, n_actions)   # 64 ‚Üí n_actions
        
        # Activaci√≥n
        self.relu = nn.ReLU()
        
        # Inicializar pesos
        self._init_weights()
    
    def _init_weights(self):
        """Inicializar pesos de manera inteligente"""
        for layer in [self.fc1, self.fc2, self.fc3, self.fc4]:
            nn.init.kaiming_normal_(layer.weight)
            nn.init.constant_(layer.bias, 0.01)
    
    def forward(self, state):
        # Asegurar que sea tensor
        if not isinstance(state, torch.Tensor):
            state = torch.FloatTensor(state)
        
        # Si es un solo estado, agregar dimensi√≥n batch
        if len(state.shape) == 1:
            state = state.unsqueeze(0)
        
        # Paso a trav√©s de la red
        x = self.relu(self.fc1(state))    # 6 ‚Üí 128 + ReLU
        x = self.relu(self.fc2(x))        # 128 ‚Üí 128 + ReLU
        x = self.relu(self.fc3(x))        # 128 ‚Üí 64 + ReLU
        q_values = self.fc4(x)            # 64 ‚Üí n_actions (sin activaci√≥n)
        
        return q_values

BATCH_SIZE=32    
summary(DQN_Network_V1(3, 2), input_size=(BATCH_SIZE, 3, 500, 500))    