In [8]:
%matplotlib inline
import import_ipynb
from RL_model import RL_model
import os
import gym
import torch as th
import numpy as np
import matplotlib.pyplot as plt
from IPython.core.pylabtools import figsize
from stable_baselines3 import DQN
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

In [15]:
class CustomMLP(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 128):
        super(CustomMLP, self).__init__(observation_space, features_dim)
        n_input = observation_space.shape[0]
        self.mlp = th.nn.Sequential(
                                    th.nn.Linear(n_input, features_dim),
                                    th.nn.ReLU(),
                                    th.nn.BatchNorm1d(features_dim),
                                    th.nn.Linear(features_dim, features_dim),
                                    th.nn.ReLU(),
                                    th.nn.BatchNorm1d(features_dim),
                                    th.nn.Linear(features_dim, features_dim))
    
    
    def forward(self, observations: th.Tensor) -> th.Tensor:
        return self.mlp(observations)


qam16_gray = {"0000": np.complex128(-3 + 3j), "0001": np.complex128(-3 + 1j), "0010": np.complex128(-3 - 3j), \
              "0011": np.complex128(-3 - 1j), "0100": np.complex128(-1 + 3j), "0101": np.complex128(-1 + 1j), \
              "0110": np.complex128(-1 - 3j), "0111": np.complex128(-1 - 1j), "1000": np.complex128(3 + 3j), \
              "1001": np.complex128(3 + 1j), "1010": np.complex128(3 - 3j), "1011": np.complex128(3 - 1j), \
              "1100": np.complex128(1 + 3j), "1101": np.complex128(1 + 1j), "1110": np.complex128(1 - 3j), \
              "1111": np.complex128(1 - 1j)}

# Training by DQN
env = RL_model(N_s = 4,
                N_rf = 4,
                N_tx = (4, 4),
                N_rx = (2, 2),
                N_ofdm = 4,
                N_ifft = 512,
                N_c = 450,
                N_gi = 64,
                mapping = qam16_gray,
                N_p = 4,
                static_chan = True,
                SNR = (70, 70),
                cl = 5,
                rays = 2,
                reward_param = 5)
env.reset()

models_dir = "models/DQN0"
logdir = "logs"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)
    
if not os.path.exists(logdir):
    os.makedirs(logdir)

time_steps = 10000

params = {
    'gamma': .9,
    'batch_size': 100,
    'train_freq': 100,
    'target_update_interval': 200,
    'learning_starts': 1000,
    'exploration_fraction': .2,
    'exploration_initial_eps': .2,
    'tau': 0.9,
    'exploration_final_eps': .01,
    'buffer_size': 100000,
    'verbose': 1,
    'tensorboard_log': logdir
}

policy_kwargs = dict(
    features_extractor_class = CustomMLP,
    features_extractor_kwargs = dict(features_dim = env.actions_n),
)

model = DQN('MlpPolicy', env, policy_kwargs = policy_kwargs, **params)
model.learn(total_timesteps = time_steps, tb_log_name = "DQN0")

model.save(f"{models_dir}/{time_steps}")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to logs\DQN0_1
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 98.2     |
|    ep_rew_mean      | 6.57     |
|    exploration_rate | 0.163    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 14       |
|    time_elapsed     | 26       |
|    total_timesteps  | 393      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 233      |
|    ep_rew_mean      | 6.65     |
|    exploration_rate | 0.0231   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 14       |
|    time_elapsed     | 125      |
|    total_timesteps  | 1862     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0614   |
|    n_updates        | 8        |
-------

In [2]:
%load_ext tensorboard
%tensorboard --logdir logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
