In [1]:
from ns3gym import ns3env
from comet_ml import Experiment, Optimizer
import tqdm
import subprocess
from collections import deque
import numpy as np

from agents.ddpg.agent import Agent, Config
# from agents.dqn.agent import Agent, Config
# from agents.dqn.model import QNetworkTf
from agents.teacher import Teacher, EnvWrapper

### Basic constants and setting up environment

In [2]:
scenario = "convergence"

simTime = 10 # seconds
stepTime = 0.01  # seconds
history_length = 100


EPISODE_COUNT = 15
steps_per_ep = int(simTime/stepTime)

sim_args = {
    "simTime": simTime,
    "envStepTime": stepTime,
    "historyLength": history_length,
    "agentType": Agent.TYPE,
    "nonZeroStart": True,
    "scenario": "convergence",
    "nWifi": 30
}
print("Steps per episode:", steps_per_ep) 

threads_no = 1
env = EnvWrapper(threads_no, **sim_args)

Steps per episode: 1000


In [3]:
# config = Config(buffer_size=2e3, batch_size=64, gamma=0.99, tau=1e-3, lr=5e-4)
config = Config(buffer_size=1.5e4*threads_no, batch_size=512, gamma=0.99, tau=1e-3, lr_actor=6e-5, lr_critic=1e-3)

In [4]:
env.reset()
ob_space = env.observation_space
ac_space = env.action_space

print("Observation space shape:", ob_space)
print("Action space shape:", ac_space)

assert ob_space is not None

Observation space shape: (1, 'Box(100,)')
Action space shape: (1, 'Box(1,)')


### Creating and training agent

In [5]:
# import tensorflow as tf

# class Network(QNetworkTf):
#     def _inference(self):
#         with tf.variable_scope("inference_"+self.name):
#             layer = tf.layers.dense(self.input, 128, activation=tf.nn.relu)
# #             layer = tf.layers.dense(layer, 128, activation=tf.nn.relu)
# #             layer = tf.layers.batch_normalization(layer)
#             layer = tf.layers.dense(layer, 64, activation=tf.nn.relu)
#             layer = tf.layers.dense(layer, 32, activation=tf.nn.relu)
# #             layer = tf.layers.dense(layer, 256, activation=tf.nn.relu)
# #             layer = tf.layers.dense(layer, 64, activation=tf.nn.relu)
#             layer = tf.layers.dense(layer, self.action_size)
#         return layer

In [None]:
optimizer = Optimizer("OZwyhJHyqzPZgHEpDFL1zxhyI")
  # Declare your hyper-parameters:
# actor_fc1 integer [1, 4] [2]
# actor_fc2 integer [1, 4] [2]
# actor_fc3 integer [1, 4] [2]

# critic_fc1 integer [1, 4] [2]
# critic_fc2 integer [1, 4] [2]
# critic_fc3 integer [1, 4] [2]

params = """
lr_actor real [1e-5, 5e-4] [6e-5] log
lr_critic real [5e-4, 1e-3] [8e-4] log
"""
optimizer.set_params(params)

teacher = Teacher(env, 1)

while True:
    suggestion = optimizer.get_suggestion()
    
    actor_l = [128, 64, 32]        # [2**(suggestion["actor_fc1"]+5), 2**(suggestion["actor_fc2"]+4), 2**(suggestion["actor_fc3"]+3)]
    critic_l = [512, 256, 64]      # [2**(suggestion["critic_fc1"]+5), 2**(suggestion["critic_fc2"]+4), 2**(suggestion["critic_fc3"]+3)]
    
    lr_actor = suggestion["lr_actor"]
    lr_critic = suggestion["lr_critic"]
    
    config = Config(buffer_size=1.5e4*threads_no, batch_size=512, gamma=0.99, tau=1e-3, lr_actor=lr_actor, lr_critic=lr_critic)
    
    print("Params:")
    for k, v in suggestion.params.items():
        print(f"{k}: {v}")
    
    agent = Agent(history_length, action_size=1, config=config, actor_layers = actor_l, critic_layers = critic_l)

    # Test the model
    logger = teacher.train(agent, EPISODE_COUNT, simTime, stepTime, "Inp: collisions mb", "Rew: normalized speed", "DDPG", "Convergence", f"Actor: {actor_l}", f"Critic: {critic_l}", f"Instances: {threads_no}", "LSTM",
                          **config.__dict__)
    
    # Report the score back
    suggestion.report_score("last_speed", logger.last_speed)
    del agent


SCRIPT_RUNNING = False

Params:
lr_actor: 6.000000000000003E-5
lr_critic: 8.000000000000004E-4


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/wwydmanski/rl-in-wifi/c01b9132d45244f0b062738662ce07c9

 52%|██████████████████████████████▉                             | 516/1000 [01:59<01:52,  4.31it/s, mb_sent=183.93 Mb]

------- STARTED TRAINING -------


100%|███████████████████████████████████████████████████████████| 1000/1000 [04:52<00:00,  3.41it/s, mb_sent=353.68 Mb]


Sent 353.68 Mb/s.	Mean speed: 35.37 Mb/s	Episode 1/15 finished



100%|███████████████████████████████████████████████████████████| 1000/1000 [05:06<00:00,  3.26it/s, mb_sent=366.67 Mb]


Sent 366.67 Mb/s.	Mean speed: 36.67 Mb/s	Episode 2/15 finished



100%|███████████████████████████████████████████████████████████| 1000/1000 [05:00<00:00,  3.32it/s, mb_sent=368.68 Mb]


Sent 368.68 Mb/s.	Mean speed: 36.87 Mb/s	Episode 3/15 finished



100%|███████████████████████████████████████████████████████████| 1000/1000 [04:59<00:00,  3.34it/s, mb_sent=359.83 Mb]


Sent 359.83 Mb/s.	Mean speed: 35.98 Mb/s	Episode 4/15 finished



100%|███████████████████████████████████████████████████████████| 1000/1000 [04:59<00:00,  3.34it/s, mb_sent=362.53 Mb]


Sent 362.53 Mb/s.	Mean speed: 36.25 Mb/s	Episode 5/15 finished



100%|███████████████████████████████████████████████████████████| 1000/1000 [05:05<00:00,  3.28it/s, mb_sent=363.67 Mb]


Sent 363.67 Mb/s.	Mean speed: 36.37 Mb/s	Episode 6/15 finished



100%|███████████████████████████████████████████████████████████| 1000/1000 [05:10<00:00,  3.22it/s, mb_sent=365.17 Mb]


Sent 365.17 Mb/s.	Mean speed: 36.52 Mb/s	Episode 7/15 finished



100%|███████████████████████████████████████████████████████████| 1000/1000 [05:07<00:00,  3.26it/s, mb_sent=366.13 Mb]


Sent 366.13 Mb/s.	Mean speed: 36.61 Mb/s	Episode 8/15 finished



 43%|█████████████████████████▊                                  | 430/1000 [01:43<02:17,  4.16it/s, mb_sent=153.76 Mb]

In [None]:
config.__dict__