In [5]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random

class SDNEnvironment:
    def __init__(self, num_switches, num_controllers):
        self.num_switches = num_switches
        self.num_controllers = num_controllers
        self.state_size = num_switches + num_switches  # Includes traffic load
        self.state = np.random.randint(0, num_controllers, size=num_switches)
        self.traffic_loads = np.random.randint(1, 100, size=num_switches)

    def reset(self):
        self.state = np.random.randint(0, self.num_controllers, size=self.num_switches)
        self.traffic_loads = np.random.randint(1, 100, size=self.num_switches)
        # Concatenate and ensure fixed size
        full_state = np.concatenate([self.state, self.traffic_loads])
        assert full_state.shape[0] == self.state_size, "State size mismatch in reset!"
        return full_state

    def step(self, actions):
        # Ensure actions match the number of switches
        if np.ndim(actions) == 0 or actions.shape[0] != self.num_switches:
            actions = np.resize(actions, self.num_switches)

        self.state = actions
        self.traffic_loads = np.random.randint(1, 100, size=self.num_switches)

        # Concatenate and ensure fixed size
        full_state = np.concatenate([self.state, self.traffic_loads])
        #print(f"self.state shape: {self.state.shape}")
        #print(f"self.traffic_loads shape: {self.traffic_loads.shape}")
        #print(f"full_state shape: {full_state.shape}, expected: {self.state_size}")
        assert full_state.shape[0] == self.state_size, "State size mismatch in step!"

        # Reward and done signal
        reward = -np.mean(self.traffic_loads)  # Example: Minimize average traffic load
        done = np.random.rand() < 0.1  # Example: Random episode termination
        return full_state, reward, done




# Q-Network with LSTM for Temporal Dependencies
class LSTM_QNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTM_QNetwork, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h_lstm, _ = self.lstm(x)
        return self.fc(h_lstm[:, -1, :])  # Use the last hidden state

# Replay Buffer
class ReplayBuffer:
    def __init__(self, capacity=10000):
        self.buffer = deque(maxlen=capacity)

    def add(self, transition):
        self.buffer.append(transition)

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def __len__(self):
        return len(self.buffer)

# tDQN Agent
class TemporalDQNAgent:
    def __init__(self, state_dim, action_dim, hidden_dim=64, gamma=0.99, lr=1e-3, sequence_length=5):
        self.gamma = gamma
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.1
        self.sequence_length = sequence_length

        self.q_network = LSTM_QNetwork(state_dim, hidden_dim, action_dim)
        self.target_network = LSTM_QNetwork(state_dim, hidden_dim, action_dim)
        self.target_network.load_state_dict(self.q_network.state_dict())
        self.optimizer = optim.Adam(self.q_network.parameters(), lr=lr)

        self.replay_buffer = ReplayBuffer()

    def select_action(self, state_sequence):
        if random.random() < self.epsilon:
            return np.random.randint(0, self.q_network.fc.out_features, size=state_sequence.shape[0])
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state_sequence)
            q_values = self.q_network(state_tensor)
            return torch.argmax(q_values, dim=1).numpy()

    def train(self, batch_size):
        if len(self.replay_buffer) < batch_size:
            return

        transitions = self.replay_buffer.sample(batch_size)
        states, actions, rewards, next_states, dones = zip(*transitions)

        states = torch.FloatTensor(states)
        actions = torch.LongTensor(actions)  # Ensure actions is a LongTensor
        actions = actions.view(-1, 1)  # Reshape to (batch_size, 1)
        rewards = torch.FloatTensor(rewards)
        next_states = torch.FloatTensor(next_states)
        dones = torch.FloatTensor(dones)

        # Debugging shapes
        #print(f"states shape: {states.shape}")
        #print(f"actions shape: {actions.shape}")

        # Gather Q-values for the actions taken
        q_values = self.q_network(states).gather(1, actions).squeeze()
        with torch.no_grad():
            next_q_values = self.target_network(next_states).max(1)[0]
            target_q_values = rewards + self.gamma * next_q_values * (1 - dones)

        loss = nn.MSELoss()(q_values, target_q_values.detach())
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()



    def update_target_network(self):
        self.target_network.load_state_dict(self.q_network.state_dict())

# Training
env = SDNEnvironment(num_switches=10, num_controllers=3)

state_dim = 2 * env.num_switches  # Switch states + traffic loads
action_dim = env.num_controllers
agent = TemporalDQNAgent(state_dim, action_dim)

num_episodes = 1000
batch_size = 32
sequence_length = 100

for episode in range(num_episodes):
    state = env.reset()
    state_sequence = deque(maxlen=sequence_length)
    for _ in range(sequence_length):
        state_sequence.append(state)

    total_reward = 0

    for t in range(100):
        try:
            # Convert deque to stacked array
            state_seq_array = np.stack(state_sequence, axis=0)  # Shape: [sequence_length, feature_size]

            # Add batch dimension for LSTM: [batch_size, sequence_length, feature_size]
            action = agent.select_action(state_seq_array[None, :, :])
            next_state, reward, done = env.step(action)
            state_sequence.append(next_state)

            # Prepare next state sequence
            next_state_seq_array = np.stack(state_sequence, axis=0)
            agent.replay_buffer.add((state_seq_array, action, reward, next_state_seq_array, done))
            agent.train(batch_size)

            total_reward += reward
            if done:
                break

        except ValueError as e:
            print(f"ValueError: {e}")
            print(f"state shape: {state.shape}")
            print(f"state_sequence shapes: {[s.shape for s in state_sequence]}")
            break

    # Update epsilon for exploration
    agent.epsilon = max(agent.epsilon * agent.epsilon_decay, agent.epsilon_min)
    if (episode + 1) % 50 == 0:
        print(f"Episode {episode + 1}, Total Reward: {total_reward}")

print("Training complete.")



RuntimeError: Size does not match at dimension 0 expected index [96, 1] to be smaller than self [32, 3] apart from dimension 1

In [2]:
state_sequence


deque([array([ 1,  2,  0,  1,  1,  1,  2,  1,  0,  1, 91, 45, 61, 70, 44, 18, 68,
              29, 75, 37]),
       array([ 1,  2,  0,  1,  1,  1,  2,  1,  0,  1, 91, 45, 61, 70, 44, 18, 68,
              29, 75, 37]),
       array([ 1,  2,  0,  1,  1,  1,  2,  1,  0,  1, 91, 45, 61, 70, 44, 18, 68,
              29, 75, 37]),
       array([ 1,  2,  0,  1,  1,  1,  2,  1,  0,  1, 91, 45, 61, 70, 44, 18, 68,
              29, 75, 37]),
       array([ 1,  2,  0,  1,  1,  1,  2,  1,  0,  1, 91, 45, 61, 70, 44, 18, 68,
              29, 75, 37]),
       array([ 1,  2,  0,  1,  1,  1,  2,  1,  0,  1, 91, 45, 61, 70, 44, 18, 68,
              29, 75, 37]),
       array([ 1,  2,  0,  1,  1,  1,  2,  1,  0,  1, 91, 45, 61, 70, 44, 18, 68,
              29, 75, 37]),
       array([ 1,  2,  0,  1,  1,  1,  2,  1,  0,  1, 91, 45, 61, 70, 44, 18, 68,
              29, 75, 37]),
       array([ 1,  2,  0,  1,  1,  1,  2,  1,  0,  1, 91, 45, 61, 70, 44, 18, 68,
              29, 75, 37]),
       arr

In [4]:
# After training, print final switch-to-controller assignment
state = env.reset()  # Get the initial state
state_sequence = deque(maxlen=sequence_length)

for _ in range(sequence_length):
    state_sequence.append(state)

# Stack the state sequence for LSTM input
state_seq_array = np.stack(state_sequence, axis=0)  # Shape: [sequence_length, feature_size]

# Add batch dimension for LSTM input: [batch_size, sequence_length, feature_size]
final_assignment = agent.select_action(state_seq_array[None, :, :])

# Print the final assignment of switches to controllers
print("Final Assignment of Switches to Controllers:")
for switch_idx, controller_idx in enumerate(final_assignment):
    print(f"Switch {switch_idx + 1} -> Controller {controller_idx}")

Final Assignment of Switches to Controllers:
Switch 1 -> Controller 0
