In [39]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter

from torchsummary import summary

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [11]:
class GraphConvolutionLayer(nn.Module):
    def __init__(self, in_features, out_features, activation=None):
        super(GraphConvolutionLayer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.activation = activation

        # Parameters
        self.weight = Parameter(torch.Tensor(in_features, out_features))
        if self.activation is not None:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        # Initialize parameters similar to Keras 'glorot_uniform' (PyTorch default is 'glorot_uniform')
        nn.init.xavier_uniform_(self.weight)
        if self.bias is not None:
            nn.init.zeros_(self.bias)

    def forward(self, node_features, adjacency_matrix):
        # Add self-connections to the adjacency matrix
        adjacency_matrix_with_self_loops = adjacency_matrix + torch.eye(adjacency_matrix.size(0)).to(adjacency_matrix.device)

        # Calculate degree matrix
        degree_matrix = adjacency_matrix_with_self_loops.sum(dim=-1)
        sqrt_degree_matrix = torch.sqrt(degree_matrix)
        inv_sqrt_degree_matrix = torch.diag(1.0 / sqrt_degree_matrix)

        # Symmetric normalization
        normalized_adjacency_matrix = torch.mm(torch.mm(inv_sqrt_degree_matrix, adjacency_matrix_with_self_loops), inv_sqrt_degree_matrix)

        # Perform the convolution operation
        support = torch.mm(node_features, self.weight)
        output_features = torch.mm(normalized_adjacency_matrix, support)

        if self.bias is not None:
            output_features += self.bias

        if self.activation is not None:
            output_features = self.activation(output_features)

        return output_features

In [12]:
class CustomGraphConvolutionRNN(nn.Module):
    def __init__(self, num_nodes, num_node_features, sequence_length, rnn_hidden_units=16, output_units=1):
        super(CustomGraphConvolutionRNN, self).__init__()
        assert num_nodes == sequence_length, "Number of nodes must be equal to sequence length for reshaping."

        # Graph Convolution Layers
        self.gc1 = GraphConvolutionLayer(in_features=num_node_features, out_features=64, activation=F.relu)
        self.gc2 = GraphConvolutionLayer(in_features=64, out_features=32, activation=F.relu)

        # RNN Layer
        self.rnn = nn.GRU(input_size=32, hidden_size=rnn_hidden_units, batch_first=True)

        # Final Dense Layer
        self.fc = nn.Linear(rnn_hidden_units, output_units)

    def forward(self, node_features, adjacency_matrix):
        # Graph Convolution Layers
        x = self.gc1((node_features, adjacency_matrix))
        x = self.gc2((x, adjacency_matrix))

        # Reshape the output for RNN input
        x = x.view(-1, sequence_length, 32)  # Equivalent to tf.reshape in TensorFlow

        # RNN layer
        rnn_out, _ = self.rnn(x)

        # Final Dense Layer for prediction
        predictions = self.fc(rnn_out.contiguous().view(-1, rnn_out.size(2)))

        return predictions

# Define the model
num_nodes = 1000
num_node_features = 1
sequence_length = 1000
model = CustomGraphConvolutionRNN(num_nodes=num_nodes, num_node_features=num_node_features, sequence_length=sequence_length)

In [26]:
class WrappedModel(nn.Module):
    def __init__(self, module):
        super(WrappedModel, self).__init__()
        self.module = module # that I actually define.
    def forward(self, x):
        print(x)
        return self.module(num_nodes=(x[0]), num_node_features=([1]), sequence_length=(x[2]))

In [27]:
model_wrapper = WrappedModel(model)
summary(model_wrapper, [num_nodes, num_node_features, sequence_length])

TypeError: Value after * must be an iterable, not int

In [28]:
print(model)

CustomGraphConvolutionRNN(
  (gc1): GraphConvolutionLayer()
  (gc2): GraphConvolutionLayer()
  (rnn): GRU(32, 16, batch_first=True)
  (fc): Linear(in_features=16, out_features=1, bias=True)
)


In [34]:
def get_model_layers(model):
    layers = []
    for name, param in model.named_parameters():
        layers.append((name, param))
    return layers
layers = get_model_layers(model)

# Print layer names and parameters
for name, param in layers:
    print(f'Layer: {name}, Size: {param.size()}')

total_params = sum(p.numel() for p in model.parameters())
print(f"Total Parameters: {total_params}")

Layer: gc1.weight, Size: torch.Size([1, 64])
Layer: gc1.bias, Size: torch.Size([64])
Layer: gc2.weight, Size: torch.Size([64, 32])
Layer: gc2.bias, Size: torch.Size([32])
Layer: rnn.weight_ih_l0, Size: torch.Size([48, 32])
Layer: rnn.weight_hh_l0, Size: torch.Size([48, 16])
Layer: rnn.bias_ih_l0, Size: torch.Size([48])
Layer: rnn.bias_hh_l0, Size: torch.Size([48])
Layer: fc.weight, Size: torch.Size([1, 16])
Layer: fc.bias, Size: torch.Size([1])
Total Parameters: 4625


In [41]:
import torch.nn as nn
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

class ComplexFeatureExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space, features_dim):
        super(ComplexFeatureExtractor, self).__init__(observation_space, features_dim)

        # Example complex model structure
        # Replace the following layers with your actual complex model
        self.complex_model = nn.Sequential(
            nn.Conv2d(observation_space.shape[0], 32, kernel_size=8, stride=4, padding=0),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(7 * 7 * 64, features_dim),
            nn.ReLU()
        )

    def forward(self, observations):
        return self.complex_model(observations)

In [43]:
from stable_baselines3.common.policies import ActorCriticPolicy

class CustomActorCriticPolicy(ActorCriticPolicy):
    def __init__(self, observation_space, action_space, lr_schedule, features_extractor_class=None, **kwargs):
        super(CustomActorCriticPolicy, self).__init__(
            observation_space,
            action_space,
            lr_schedule,
            features_extractor_class=features_extractor_class,
            **kwargs
        )

In [42]:
import os
import pickle

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys

import h5py
from IPython.display import clear_output

import gym
import stable_baselines3
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3 import A2C
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.dqn.policies import MlpPolicy
from stable_baselines3.common import results_plotter


In [37]:
# callback from https://stable-baselines.readthedocs.io/en/master/guide/examples.html#using-callback-monitoring-training
class SaveOnBestTrainingRewardCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq: (int)
    :param log_dir: (str) Path to the folder where the model will be saved.
      It must contain the file created by the ``Monitor`` wrapper.
    :param verbose: (int)
    """
    def __init__(self, check_freq: int, log_dir: str, verbose=1, show_plot: bool=False):
        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.save_path = os.path.join(log_dir, 'best_model')
        self.best_mean_reward = -np.inf
        self.show_plot = show_plot

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:

        if self.show_plot and self.n_calls % self.check_freq == 0 and self.n_calls > 5001:
            plotting_average_window = 100

            training_data = pd.read_csv(self.log_dir + 'training.monitor.csv', skiprows=1)

            fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(9.6, 4.8))

            ax1.plot(np.convolve(training_data['r'], np.ones(plotting_average_window)/plotting_average_window, mode='valid'))

            ax1.set_xlabel('Episode')
            ax1.set_ylabel('Reward')

            ax2.semilogy(np.convolve(training_data['episode_service_blocking_rate'], np.ones(plotting_average_window)/plotting_average_window, mode='valid'))

            ax2.set_xlabel('Episode')
            ax2.set_ylabel('Episode service blocking rate')

            ax3.semilogy(np.convolve(training_data['episode_bit_rate_blocking_rate'], np.ones(plotting_average_window)/plotting_average_window, mode='valid'))

            ax3.set_xlabel('Episode')
            ax3.set_ylabel('Episode bit rate blocking rate')

            # fig.get_size_inches()
            plt.tight_layout()
            plt.show()

        if self.n_calls % self.check_freq == 0:

            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print("Num timesteps: {} - ".format(self.num_timesteps), end="")
                    print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward, mean_reward))
                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print("Saving new best model to {}".format(self.save_path))
                        self.model.save(self.save_path)
                if self.verbose > 0:
                    clear_output(wait=True)

        return True

In [38]:
# loading the topology binary file containing the graph and the k-shortest paths
# if you want to generate your own binary topology file, check examples/create_topology_rmsa.py
topology_name = 'nsfnet_chen'
k_paths = 5

with open(f'/Users/sribalac/Documents/Optical RL Gym/optical-rl-gym-main/examples/topologies/{topology_name}_{k_paths}-paths_6-modulations.h5', 'rb') as f:
    topology = pickle.load(f)


monitor_info_keywords=('episode_service_blocking_rate','episode_bit_rate_blocking_rate')


print(topology)
# node probabilities from https://github.com/xiaoliangchenUCD/DeepRMSA/blob/6708e9a023df1ec05bfdc77804b6829e33cacfe4/Deep_RMSA_A3C.py#L77
node_request_probabilities = np.array([0.01801802, 0.04004004, 0.05305305, 0.01901902, 0.04504505,
                                       0.02402402, 0.06706707, 0.08908909, 0.13813814, 0.12212212,
                                       0.07607608, 0.12012012, 0.01901902, 0.16916917])

# mean_service_holding_time=7.5,
env_args = dict(topology=topology, seed=10,
                allow_rejection=False, # the agent cannot proactively reject a request
                j=1, # consider only the first suitable spectrum block for the spectrum assignment
                mean_service_holding_time=7.5, # value is not set as in the paper to achieve comparable reward values
                episode_length=50, node_request_probabilities=node_request_probabilities)
topology.edges()
# # Create log dir
log_dir = "/Users/sribalac/Documents/Optical RL Gym/optical-rl-gym-main/logs/deeprmsa-dqn/"
os.makedirs(log_dir, exist_ok=True)
callback = SaveOnBestTrainingRewardCallback(check_freq=100, log_dir=log_dir, show_plot=False)

env = gym.make('DeepRMSA-v0', **env_args)

# logs will be saved in log_dir/training.monitor.csv
# in this case, on top of the usual monitored things, we also   monitor service and bit rate blocking rates
env = Monitor(env, log_dir + 'training', info_keywords=monitor_info_keywords)
# for more information about the monitor, check https://stable-baselines.readthedocs.io/en/master/_modules/stable_baselines/bench/monitor.html#Monitor

# here goes the arguments of the policy network to be used
policy_args = dict(net_arch=5*[128]) # we use the elu activation function

agent = A2C(CustomPolicy, env, verbose=1, tensorboard_log="/Users/sribalac/Documents/Optical RL Gym/optical-rl-gym-main/tb/DQN-DeepRMSA-v0/", policy_kwargs=policy_args, gamma=.95, learning_rate=10e-6)


Graph named 'NSFNET_CHEN' with 14 nodes and 22 edges
optical_network_env.set_load(load, mean_service_holding_time) 75.0 7.5
deeprmsa_env.reset()
rmsa_env.reset()
optical_network_env.reset()
rmsa_env.next_service()
optical_network_env._get_node_pair()
0 10 9 6 5 0.08472372498338585 4.201288411172651 87
deeprmsa_env.observation()
get_available_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_osnr=12.6, inband_xt=-17), current_modulation=None)
get_number_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_osnr=12.6, inband_xt=-17), current_modulation=None)
get_available_blocks(path) 0
get_available_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_

In [40]:
a = agent.learn(total_timesteps=10, callback=callback)

deeprmsa_env.reset()
rmsa_env.reset()
deeprmsa_env.observation()
get_available_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_osnr=12.6, inband_xt=-17), current_modulation=None)
get_number_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_osnr=12.6, inband_xt=-17), current_modulation=None)
get_available_blocks(path) 0
get_available_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_osnr=12.6, inband_xt=-17), current_modulation=None)
get_number_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_osnr=12.6, inband_xt=-17), current_m