# Q-Learning Placement Optimizer - Week 6
## HYDATIS DQN Agent for Optimal Pod Placement

**Objective**: Develop Q-Learning agent achieving +34% improvement over random placement

**Success Criteria**:
- DQN agent with PyTorch implementation
- Multi-objective reward function
- Experience replay with 50,000 buffer
- Target improvement: ≥34% vs random baseline

In [None]:
# Import Q-Learning implementation
import sys
sys.path.append('/home/jovyan/work/src')

from ml_models.qlearning.agent import HYDATISDQNAgent, HYDATISPlacementDQN
from ml_models.qlearning.environment import ClusterEnvironment
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

print("✅ Q-Learning modules imported")
print(f"PyTorch version: {torch.__version__}")
print(f"Device available: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}")

In [None]:
# Setup HYDATIS cluster configuration
cluster_config = {
    'nodes': 6,
    'masters': 3,
    'workers': 3,
    'cpu_cores_per_node': 8,
    'memory_gb_per_node': 16,
    'worker_nodes': ['worker-1', 'worker-2', 'worker-3'],
    'master_nodes': ['master-1', 'master-2', 'master-3']
}

# Initialize DQN placement optimizer
dqn_placement = HYDATISPlacementDQN(cluster_config)

print(f"🧠 DQN Agent initialized:")
print(f"   State size: {dqn_placement.state_size}")
print(f"   Action size: {dqn_placement.action_size}")
print(f"   Target improvement: {dqn_placement.target_improvement:.1%}")
print(f"   Network parameters: {sum(p.numel() for p in dqn_placement.agent.q_network.parameters()):,}")

In [None]:
# Test placement decision
sample_cluster_state = np.random.uniform(0.1, 0.8, dqn_placement.state_size)
sample_pod_requirements = {'cpu_request': 0.2, 'memory_request': 0.3}

action, decision = dqn_placement.select_placement_node(
    sample_cluster_state, 
    sample_pod_requirements,
    training=False
)

print("🎯 Sample Placement Decision:")
print(f"   Selected node: {decision['selected_node']}")
print(f"   Placement quality: {decision['placement_quality']['overall_quality']:.3f}")
print(f"   Decision confidence: {decision['decision_confidence']:.3f}")
print(f"   Exploration factor: {decision['exploration_factor']:.3f}")

# Show Q-values for all nodes
print("\n📊 Q-Values by Node:")
for i, (node, q_val) in enumerate(zip(cluster_config['worker_nodes'], decision['q_values'])):
    print(f"   {node}: {q_val:.4f}{'  ← SELECTED' if i == action else ''}")

In [None]:
# Get detailed placement insights
insights = dqn_placement.get_placement_insights(sample_cluster_state)

print("🔍 Detailed Placement Analysis:")
print(f"   Recommended node: {insights['recommended_node']}")
print(f"   Confidence score: {insights['confidence_score']:.3f}")

print("\n📈 Cluster Utilization:")
util = insights['cluster_utilization']
print(f"   Average CPU: {util['average_cpu']:.1%}")
print(f"   Average Memory: {util['average_memory']:.1%}")
print(f"   CPU variance: {util['cpu_variance']:.4f}")

print("\n🏆 Node Rankings:")
for rank, node_info in enumerate(insights['node_rankings'][:3], 1):
    print(f"   {rank}. {node_info['node']}: Q={node_info['q_value']:.3f}, "
          f"CPU={node_info['cpu_load']:.1%}, Mem={node_info['memory_load']:.1%}")