In [9]:
import tensorflow as tf
import numpy as np
import gym

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Input, GRU, Dense

import stable_baselines3
from stable_baselines3 import A2C
from stable_baselines3.common.env_checker import check_env

In [10]:
class GraphConvolutionLayer(Layer):
    def __init__(self, units, activation=None, **kwargs):
        super(GraphConvolutionLayer, self).__init__(**kwargs)
        self.bias = None
        self.kernel = None
        self.units = units
        self.activation = tf.keras.activations.get(activation)

    def build(self, input_shape):
        node_feature_shape = input_shape[0][-1]
        self.kernel = self.add_weight(
            shape=(node_feature_shape, self.units),
            initializer='glorot_uniform',
            name='kernel',
            trainable=True
        )
        if self.activation is not None:
            self.bias = self.add_weight(
                shape=(self.units,),
                initializer='zeros',
                name='bias',
                trainable=True
            )
        else:
            self.bias = None
        super(GraphConvolutionLayer, self).build(input_shape)

    def call(self, inputs):
        node_features, adjacency_matrix = inputs

        # Add self-connections to the adjacency matrix
        adjacency_matrix_with_self_loops = adjacency_matrix + tf.eye(tf.shape(adjacency_matrix)[0])

        # Calculate degree matrix
        degree_matrix = tf.reduce_sum(adjacency_matrix_with_self_loops, axis=-1)
        sqrt_degree_matrix = tf.sqrt(degree_matrix)
        inv_sqrt_degree_matrix = tf.linalg.diag(1.0 / sqrt_degree_matrix)

        # Symmetric normalization
        normalized_adjacency_matrix = inv_sqrt_degree_matrix @ adjacency_matrix_with_self_loops @ inv_sqrt_degree_matrix

        # Perform the convolution operation
        support = tf.matmul(node_features, self.kernel)
        output_features = tf.matmul(normalized_adjacency_matrix, support)

        if self.bias is not None:
            output_features += self.bias

        if self.activation is not None:
            output_features = self.activation(output_features)

        return output_features

    def compute_output_shape(self, input_shape):
        node_feature_shape = input_shape[0]
        return node_feature_shape[0], self.units

In [11]:
# node_features: feature matrix of shape (num_nodes, num_node_features)
# adjacency_matrix: adjacency matrix of shape (num_nodes, num_nodes)
# sequence_length: the length of the time series data for each node
num_nodes = 1000
num_node_features = 1
sequence_length = 30

node_features = tf.random.uniform((num_nodes, num_node_features))

adjacency_matrix = tf.random.uniform((num_nodes, num_nodes), minval=0, maxval=2, dtype=tf.int32)
# Ensure symmetry for undirected graph
adjacency_matrix = tf.maximum(adjacency_matrix, tf.transpose(adjacency_matrix))

# Define the inputs
node_features_input = Input(shape=(node_features.shape[1],), dtype=tf.float32)
adjacency_matrix_input = Input(shape=(adjacency_matrix.shape[0], adjacency_matrix.shape[1]), dtype=tf.float32)

# Create the first GraphConvolutionLayer
gc1 = GraphConvolutionLayer(units=64, activation='relu')([node_features_input, adjacency_matrix_input])

# Create the second GraphConvolutionLayer
gc2 = GraphConvolutionLayer(units=32, activation='relu')([gc1, adjacency_matrix_input])

# Assume that the output of the second GCN layer is a sequence for the RNN layer
# We will reshape the GCN output to be compatible with RNN input
# This requires that the number of nodes is equal to the sequence length
gcn_output_sequence = tf.reshape(gc2, (-1, sequence_length, 32))  # Reshape to (batch_size, sequence_length, num_features)

# Create the RNN layer, using GRU for example
rnn_layer = GRU(units=16, return_sequences=True)(gcn_output_sequence)  # You can set return_sequences to False if needed

# Add a final Dense layer for prediction
output_units = 1  # Adjust this based on your specific task
predictions = Dense(output_units)(rnn_layer)

# Create the model
custom_preprocessing_model = Model(inputs=[node_features_input, adjacency_matrix_input], outputs=predictions)

# Compile the model
custom_preprocessing_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Print the model summary to check the architecture
custom_preprocessing_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 1000, 1000)  0           []                               
                                ]                                                                 
                                                                                                  
 graph_convolution_layer_2 (Gra  (None, 1000, 64)    128         ['input_3[0][0]',                
 phConvolutionLayer)                                              'input_4[0][0]']                
                                                                                            

In [12]:
from optical_rl_gym.utils import custom_print
from stable_baselines3.common.policies import ActorCriticPolicy

class CustomPolicy(ActorCriticPolicy):
    def __init__(self, *args, **kwargs):
        super(CustomPolicy, self).__init__(*args, **kwargs)

        # Instantiate the custom model as part of the policy architecture
        self.custom_preprocessing_model = custom_preprocessing_model

    def step(self, obs, state=None, mask=None, deterministic=False):
        custom_print("CustomPolicy", obs)
        # Preprocess the observation using the custom model
        preprocessed_obs = self.custom_preprocessing_model(obs)

        # Continue with the rest of the policy's forward pass
        return super(CustomPolicy, self).step(preprocessed_obs, state, mask, deterministic)

In [13]:
import os
import pickle

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys

import h5py
from IPython.display import clear_output

import gym
import stable_baselines3
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3 import A2C
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.dqn.policies import MlpPolicy
from stable_baselines3.common import results_plotter


In [14]:
# callback from https://stable-baselines.readthedocs.io/en/master/guide/examples.html#using-callback-monitoring-training
class SaveOnBestTrainingRewardCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq: (int)
    :param log_dir: (str) Path to the folder where the model will be saved.
      It must contain the file created by the ``Monitor`` wrapper.
    :param verbose: (int)
    """
    def __init__(self, check_freq: int, log_dir: str, verbose=1, show_plot: bool=False):
        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.save_path = os.path.join(log_dir, 'best_model')
        self.best_mean_reward = -np.inf
        self.show_plot = show_plot

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:

        if self.show_plot and self.n_calls % self.check_freq == 0 and self.n_calls > 5001:
            plotting_average_window = 100

            training_data = pd.read_csv(self.log_dir + 'training.monitor.csv', skiprows=1)

            fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(9.6, 4.8))

            ax1.plot(np.convolve(training_data['r'], np.ones(plotting_average_window)/plotting_average_window, mode='valid'))

            ax1.set_xlabel('Episode')
            ax1.set_ylabel('Reward')

            ax2.semilogy(np.convolve(training_data['episode_service_blocking_rate'], np.ones(plotting_average_window)/plotting_average_window, mode='valid'))

            ax2.set_xlabel('Episode')
            ax2.set_ylabel('Episode service blocking rate')

            ax3.semilogy(np.convolve(training_data['episode_bit_rate_blocking_rate'], np.ones(plotting_average_window)/plotting_average_window, mode='valid'))

            ax3.set_xlabel('Episode')
            ax3.set_ylabel('Episode bit rate blocking rate')

            # fig.get_size_inches()
            plt.tight_layout()
            plt.show()

        if self.n_calls % self.check_freq == 0:

            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print("Num timesteps: {} - ".format(self.num_timesteps), end="")
                    print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward, mean_reward))
                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print("Saving new best model to {}".format(self.save_path))
                        self.model.save(self.save_path)
                if self.verbose > 0:
                    clear_output(wait=True)

        return True

In [15]:
# loading the topology binary file containing the graph and the k-shortest paths
# if you want to generate your own binary topology file, check examples/create_topology_rmsa.py
topology_name = 'nsfnet_chen'
k_paths = 5

with open(f'/Users/sribalac/Documents/Optical RL Gym/optical-rl-gym-main/examples/topologies/{topology_name}_{k_paths}-paths_6-modulations.h5', 'rb') as f:
    topology = pickle.load(f)


monitor_info_keywords=('episode_service_blocking_rate','episode_bit_rate_blocking_rate')


print(topology)
# node probabilities from https://github.com/xiaoliangchenUCD/DeepRMSA/blob/6708e9a023df1ec05bfdc77804b6829e33cacfe4/Deep_RMSA_A3C.py#L77
node_request_probabilities = np.array([0.01801802, 0.04004004, 0.05305305, 0.01901902, 0.04504505,
                                       0.02402402, 0.06706707, 0.08908909, 0.13813814, 0.12212212,
                                       0.07607608, 0.12012012, 0.01901902, 0.16916917])

# mean_service_holding_time=7.5,
env_args = dict(topology=topology, seed=10,
                allow_rejection=False, # the agent cannot proactively reject a request
                j=1, # consider only the first suitable spectrum block for the spectrum assignment
                mean_service_holding_time=7.5, # value is not set as in the paper to achieve comparable reward values
                episode_length=50, node_request_probabilities=node_request_probabilities)
topology.edges()
# # Create log dir
log_dir = "/Users/sribalac/Documents/Optical RL Gym/optical-rl-gym-main/logs/deeprmsa-dqn/"
os.makedirs(log_dir, exist_ok=True)
callback = SaveOnBestTrainingRewardCallback(check_freq=100, log_dir=log_dir, show_plot=False)

env = gym.make('DeepRMSA-v0', **env_args)

# logs will be saved in log_dir/training.monitor.csv
# in this case, on top of the usual monitored things, we also   monitor service and bit rate blocking rates
env = Monitor(env, log_dir + 'training', info_keywords=monitor_info_keywords)
# for more information about the monitor, check https://stable-baselines.readthedocs.io/en/master/_modules/stable_baselines/bench/monitor.html#Monitor

# here goes the arguments of the policy network to be used
policy_args = dict(net_arch=5*[128]) # we use the elu activation function

agent = A2C(CustomPolicy, env, verbose=1, tensorboard_log="/Users/sribalac/Documents/Optical RL Gym/optical-rl-gym-main/tb/DQN-DeepRMSA-v0/", policy_kwargs=policy_args, gamma=.95, learning_rate=10e-6)


Graph named 'NSFNET_CHEN' with 14 nodes and 22 edges
optical_network_env.set_load(load, mean_service_holding_time) 75.0 7.5
deeprmsa_env.reset()
rmsa_env.reset()
optical_network_env.reset()
rmsa_env.next_service()
optical_network_env._get_node_pair()
0 10 9 6 5 0.08472372498338585 4.201288411172651 87
deeprmsa_env.observation()
get_available_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_osnr=12.6, inband_xt=-17), current_modulation=None)
get_number_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_osnr=12.6, inband_xt=-17), current_modulation=None)
get_available_blocks(path) 0
get_available_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_

In [16]:
a = agent.learn(total_timesteps=10, callback=callback)

deeprmsa_env.reset()
rmsa_env.reset()
deeprmsa_env.observation()
get_available_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_osnr=12.6, inband_xt=-17), current_modulation=None)
get_number_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_osnr=12.6, inband_xt=-17), current_modulation=None)
get_available_blocks(path) 0
get_available_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_osnr=12.6, inband_xt=-17), current_modulation=None)
get_number_slots(path) Path(path_id=290, node_list=['6', '10'], hops=1, length=1050, best_modulation=Modulation(name='QPSK', maximum_length=2000, spectral_efficiency=2, minimum_osnr=12.6, inband_xt=-17), current_m

In [None]:
custom_print("==============================")