# Load and setup modules

In [1]:
# Go to project home directory
%cd ~/Documents/dqn4wirelesscontrol/

# System built-in modules
import time
import sys
sys_stdout = sys.stdout

# Project dependency modules
import pandas as pd
pd.set_option('mode.chained_assignment', None)  # block warnings due to DataFrame value assignment
import lasagne

# Project modules
from sleep_control.traffic_emulator import TrafficEmulator
from sleep_control.traffic_server import TrafficServer
from sleep_control.controller import QController, DummyController, NController
from sleep_control.integration import Emulation
from sleep_control.env_models import SJTUModel

from rl.qtable import QAgent
from rl.qnn_theano import QAgentNN
from rl.mixin import PhiMixin, DynaMixin

# Composite classes
class Phi_QAgentNN(PhiMixin, QAgentNN):
    def __init__(self, **kwargs):
        super(QAgentNNPhi, self).__init__(**kwargs)

class Dyna_QAgent(DynaMixin, QAgent):
    def __init__(self, **kwargs):
        super(Dyna_QAgent, self).__init__(**kwargs)
        
class Dyna_QAgentNN(DynaMixin, QAgentNN):
    def __init__(self, **kwargs):
        super(Dyna_QAgentNN, self).__init__(**kwargs)

/home/lewis/Documents/dqn4wirelesscontrol




---

# Load data

In [None]:
# Load from raw form
session_df = pd.read_csv(filepath_or_buffer='./sleep_control/data/net_traffic_nonull.dat', 
                         sep=',', 
                         names=['uid','location','startTime_unix','duration_ms','domainProviders','domainTypes','domains','bytesByDomain','requestsByDomain'])
session_df.index.name = 'sessionID'
session_df['endTime_unix'] = session_df['startTime_unix'] + session_df['duration_ms']
session_df['startTime_datetime'] = pd.to_datetime(session_df['startTime_unix'], unit='ms')  # convert start time to readible date_time strings
session_df['endTime_datetime'] = pd.to_datetime(session_df['endTime_unix'], unit='ms')
session_df['totalBytes'] = session_df['bytesByDomain'].apply(lambda x: x.split(';')).map(lambda x: sum(map(float, x)))  # sum bytes across domains
session_df['totalRequests'] = session_df['requestsByDomain'].apply(lambda x: x.split(';')).map(lambda x: sum(map(float, x)))  # sum requests across domains
session_df.sort(['startTime_datetime'], ascending=True, inplace=True)  # get it sorted
session_df['interArrivalDuration_datetime'] = session_df.groupby('location')['startTime_datetime'].diff()  # group-wise diff
session_df['interArrivalDuration_ms'] = session_df.groupby('location')['startTime_unix'].diff()  # group-wise diff
session_df = session_df[session_df['location']=='第三食堂']

---

# Run experiment

In [9]:
# Agent
actions = [(True, None), (False, 'serve_all')]
gamma, alpha = 0.9, 0.9  # TD backup
explore_strategy, epsilon = 'epsilon', 0.02  # exploration
reward_scaling, reward_scaling_update, rs_period = 1, 'adaptive', 32  # reward scaling
phi_length = 5
dim_state = (1, phi_length, 3+2)
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
range_state = [[range_state_slice]*phi_length]
momentum, learning_rate = 0.9, 0.01  # SGD
num_buffer, memory_size, batch_size, update_period, freeze_period  = 2, 200, 100, 4, 16

# Env
log_file_path = "sleep_control/experiments/log/message_2016-6-16_Trace_N01_b5.log"
log_file = open(log_file_path,"w")
time_step, n_days = pd.Timedelta(seconds=2), 7  # epoch length, total time length
TOTAL_EPOCHS = 3600*24*n_days/2
backoff = num_buffer*memory_size+phi_length
head_datetime = pd.to_datetime("2014-10-15 09:40:00") - time_step*backoff
tail_datetime = head_datetime + pd.Timedelta(days=1)*n_days*5
beta = 0.5  # R = (1-beta)*ServiceReward + beta*Cost

In [None]:
te = TrafficEmulator(session_df=session_df, time_step=time_step, head_datetime=head_datetime, tail_datetime=tail_datetime, verbose=2)
ts = TrafficServer(cost=5, verbose=2)
agent = Phi_QAgentNN(
    phi_length=phi_length,
    dim_state=dim_state, range_state=range_state,
    f_build_net = None,
    batch_size=batch_size, learning_rate=learning_rate, momentum=momentum,
    reward_scaling=reward_scaling, reward_scaling_update=reward_scaling_update, rs_period=rs_period,
    update_period=update_period, freeze_period=freeze_period,
    memory_size=memory_size, num_buffer=num_buffer,
    actions=actions, alpha=alpha, gamma=gamma,
    explore_strategy=explore_strategy, epsilon=epsilon,
    verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c, beta=beta)

In [None]:
t = time.time()
sys.stdout = log_file
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
log_file.close()
time.time()-t

New TrafficEmulator with parameters:
  head=2014-10-15 09:26:30
  tail=2014-11-19 09:26:30
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


2886.1828379631042

In [2]:
sys_stdout = std_out