In [11]:
%cd ~/ipython-notebook/dqn4wirelesscontrol/
import sys
import time
import pandas as pd
import lasagne
pd.set_option('mode.chained_assignment', None)  # block warnings due to DataFrame value assignment
# Project modules
from sleep_control.integration import Emulation
import sleep_control.traffic_emulator
import sleep_control.traffic_server
from sleep_control.controller import QController, DummyController
import qlearning.qtable
import qlearning.qnn
from qlearning.qtable import QAgent
from qlearning.qnn import QAgentNN
from qlearning.mixin import PhiMixin

/home/admin-326/Data/ipython-notebook/dqn4wirelesscontrol


# Setting-up data

In [12]:
std_out = sys.stdout

In [13]:
session_df = pd.read_csv(filepath_or_buffer='./sleep_control/data/net_traffic_nonull.dat', 
                         sep=',', 
                         names=['uid','location','startTime_unix','duration_ms','domainProviders','domainTypes','domains','bytesByDomain','requestsByDomain'])
session_df.index.name = 'sessionID'
session_df['endTime_unix'] = session_df['startTime_unix'] + session_df['duration_ms']
session_df['startTime_datetime'] = pd.to_datetime(session_df['startTime_unix'], unit='ms')  # convert start time to readible date_time strings
session_df['endTime_datetime'] = pd.to_datetime(session_df['endTime_unix'], unit='ms')
session_df['totalBytes'] = session_df['bytesByDomain'].apply(lambda x: x.split(';')).map(lambda x: sum(map(float, x)))  # sum bytes across domains
session_df['totalRequests'] = session_df['requestsByDomain'].apply(lambda x: x.split(';')).map(lambda x: sum(map(float, x)))  # sum requests across domains
session_df.sort(['startTime_datetime'], ascending=True, inplace=True)  # get it sorted
session_df['interArrivalDuration_datetime'] = session_df.groupby('location')['startTime_datetime'].diff()  # group-wise diff
session_df['interArrivalDuration_ms'] = session_df.groupby('location')['startTime_unix'].diff()  # group-wise diff



In [14]:
class QAgentNNPhi(PhiMixin, QAgentNN):
    def __init__(self, **kwargs):
        super(QAgentNNPhi, self).__init__(**kwargs)

---

In [17]:
session_df_loc = session_df[session_df['location']=='第三食堂']
gamma, alpha=0.5, 0.9
explore_strategy, epsilon = 'epsilon', 0.02
reward_scaling, reward_scaling_update = 20, 'fixed'
phi_length = 5
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
momentum, learning_rate, update_period = 0.9, 0.01, 16
num_buffer, memory_size, batch_size, freeze_period  = 2, 200, 100, 4
log_file = "sleep_control/experiments/log/message_2016-6-7_2330_BW_M200.log"
time_step, n_days = pd.Timedelta(seconds=2), 7
head_datetime = pd.to_datetime("2014-11-01 0:00:00")
tail_datetime = head_datetime + pd.Timedelta(days=1)*n_days*5
head_datetime = head_datetime - time_step*(num_buffer*memory_size+phi_length)

In [18]:
log_file = open(log_file,"w")
dim_state = (1, phi_length, 3+2)
actions = [(True, None), (False, 'serve_all')]
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, head_datetime=head_datetime, tail_datetime=tail_datetime, time_step=time_step, verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
agent = QAgentNNPhi(phi_length=phi_length, dim_state=dim_state, range_state=[[range_state_slice]*phi_length], actions=actions,
        alpha=alpha, learning_rate=learning_rate, momentum=momentum, update_period=update_period,
        reward_scaling=reward_scaling, reward_scaling_update=reward_scaling_update,
        memory_size=memory_size, num_buffer=num_buffer, freeze_period=freeze_period, batch_size=batch_size,
        gamma=gamma, explore_strategy=explore_strategy, epsilon=epsilon,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)
t = time.time()
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*n_days/step_sec
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
log_file.close()
time.time()-t

New TrafficEmulator with parameters:
  head=2014-10-31 23:46:30
  tail=2014-12-06 00:00:00
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


3286.747209787369

---

In [19]:
session_df_loc = session_df[session_df['location']=='第三食堂']
gamma, alpha=0.5, 0.9
explore_strategy, epsilon = 'epsilon', 0.02
reward_scaling, reward_scaling_update = 20, 'fixed'
phi_length = 5
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
momentum, learning_rate, update_period = 0.9, 0.01, 16
num_buffer, memory_size, batch_size, freeze_period  = 2, 400, 100, 4
log_file = "sleep_control/experiments/log/message_2016-6-7_2330_BW_M400.log"
time_step, n_days = pd.Timedelta(seconds=2), 7
head_datetime = pd.to_datetime("2014-11-01 0:00:00")
tail_datetime = head_datetime + pd.Timedelta(days=1)*n_days*5
head_datetime = head_datetime - time_step*(num_buffer*memory_size+phi_length)

In [20]:
log_file = open(log_file,"w")
dim_state = (1, phi_length, 3+2)
actions = [(True, None), (False, 'serve_all')]
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, head_datetime=head_datetime, tail_datetime=tail_datetime, time_step=time_step, verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
agent = QAgentNNPhi(phi_length=phi_length, dim_state=dim_state, range_state=[[range_state_slice]*phi_length], actions=actions,
        alpha=alpha, learning_rate=learning_rate, momentum=momentum, update_period=update_period,
        reward_scaling=reward_scaling, reward_scaling_update=reward_scaling_update,
        memory_size=memory_size, num_buffer=num_buffer, freeze_period=freeze_period, batch_size=batch_size,
        gamma=gamma, explore_strategy=explore_strategy, epsilon=epsilon,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)
t = time.time()
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*n_days/step_sec
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
log_file.close()
time.time()-t

New TrafficEmulator with parameters:
  head=2014-10-31 23:33:10
  tail=2014-12-06 00:00:00
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


3252.5205240249634

---

In [21]:
session_df_loc = session_df[session_df['location']=='第三食堂']
gamma, alpha=0.5, 0.9
explore_strategy, epsilon = 'epsilon', 0.02
reward_scaling, reward_scaling_update = 20, 'fixed'
phi_length = 5
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
momentum, learning_rate, update_period = 0.9, 0.01, 16
num_buffer, memory_size, batch_size, freeze_period  = 2, 800, 100, 4
log_file = "sleep_control/experiments/log/message_2016-6-7_2330_BW_M800.log"
time_step, n_days = pd.Timedelta(seconds=2), 7
head_datetime = pd.to_datetime("2014-11-01 0:00:00")
tail_datetime = head_datetime + pd.Timedelta(days=1)*n_days*5
head_datetime = head_datetime - time_step*(num_buffer*memory_size+phi_length)

In [22]:
log_file = open(log_file,"w")
dim_state = (1, phi_length, 3+2)
actions = [(True, None), (False, 'serve_all')]
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, head_datetime=head_datetime, tail_datetime=tail_datetime, time_step=time_step, verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
agent = QAgentNNPhi(phi_length=phi_length, dim_state=dim_state, range_state=[[range_state_slice]*phi_length], actions=actions,
        alpha=alpha, learning_rate=learning_rate, momentum=momentum, update_period=update_period,
        reward_scaling=reward_scaling, reward_scaling_update=reward_scaling_update,
        memory_size=memory_size, num_buffer=num_buffer, freeze_period=freeze_period, batch_size=batch_size,
        gamma=gamma, explore_strategy=explore_strategy, epsilon=epsilon,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)
t = time.time()
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*n_days/step_sec
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
log_file.close()
time.time()-t

New TrafficEmulator with parameters:
  head=2014-10-31 23:06:30
  tail=2014-12-06 00:00:00
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


3292.680137872696

---

In [23]:
session_df_loc = session_df[session_df['location']=='第三食堂']
gamma, alpha=0.5, 0.9
explore_strategy, epsilon = 'epsilon', 0.02
reward_scaling, reward_scaling_update = 20, 'fixed'
phi_length = 5
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
momentum, learning_rate, update_period = 0.9, 0.01, 16
num_buffer, memory_size, batch_size, freeze_period  = 2, 1600, 100, 4
log_file = "sleep_control/experiments/log/message_2016-6-7_2330_BW_M1600.log"
time_step, n_days = pd.Timedelta(seconds=2), 7
head_datetime = pd.to_datetime("2014-11-01 0:00:00")
tail_datetime = head_datetime + pd.Timedelta(days=1)*n_days*5
head_datetime = head_datetime - time_step*(num_buffer*memory_size+phi_length)

In [24]:
log_file = open(log_file,"w")
dim_state = (1, phi_length, 3+2)
actions = [(True, None), (False, 'serve_all')]
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, head_datetime=head_datetime, tail_datetime=tail_datetime, time_step=time_step, verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
agent = QAgentNNPhi(phi_length=phi_length, dim_state=dim_state, range_state=[[range_state_slice]*phi_length], actions=actions,
        alpha=alpha, learning_rate=learning_rate, momentum=momentum, update_period=update_period,
        reward_scaling=reward_scaling, reward_scaling_update=reward_scaling_update,
        memory_size=memory_size, num_buffer=num_buffer, freeze_period=freeze_period, batch_size=batch_size,
        gamma=gamma, explore_strategy=explore_strategy, epsilon=epsilon,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)
t = time.time()
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*n_days/step_sec
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
log_file.close()
time.time()-t

New TrafficEmulator with parameters:
  head=2014-10-31 22:13:10
  tail=2014-12-06 00:00:00
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


3319.8053851127625

---

In [25]:
session_df_loc = session_df[session_df['location']=='第三食堂']
gamma, alpha=0.5, 0.9
explore_strategy, epsilon = 'epsilon', 0.02
reward_scaling, reward_scaling_update = 20, 'fixed'
phi_length = 1
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
momentum, learning_rate, update_period = 0.9, 0.01, 16
num_buffer, memory_size, batch_size, freeze_period  = 2, 200, 100, 4
log_file = "sleep_control/experiments/log/message_2016-6-7_2330_BW_P1.log"
time_step, n_days = pd.Timedelta(seconds=2), 7
head_datetime = pd.to_datetime("2014-11-01 0:00:00")
tail_datetime = head_datetime + pd.Timedelta(days=1)*n_days*5
head_datetime = head_datetime - time_step*(num_buffer*memory_size+phi_length)

In [26]:
log_file = open(log_file,"w")
dim_state = (1, phi_length, 3+2)
actions = [(True, None), (False, 'serve_all')]
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, head_datetime=head_datetime, tail_datetime=tail_datetime, time_step=time_step, verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
agent = QAgentNNPhi(phi_length=phi_length, dim_state=dim_state, range_state=[[range_state_slice]*phi_length], actions=actions,
        alpha=alpha, learning_rate=learning_rate, momentum=momentum, update_period=update_period,
        reward_scaling=reward_scaling, reward_scaling_update=reward_scaling_update,
        memory_size=memory_size, num_buffer=num_buffer, freeze_period=freeze_period, batch_size=batch_size,
        gamma=gamma, explore_strategy=explore_strategy, epsilon=epsilon,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)
t = time.time()
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*n_days/step_sec
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
log_file.close()
time.time()-t

New TrafficEmulator with parameters:
  head=2014-10-31 23:46:38
  tail=2014-12-06 00:00:00
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


3283.5739748477936

---

In [27]:
session_df_loc = session_df[session_df['location']=='第三食堂']
gamma, alpha=0.5, 0.9
explore_strategy, epsilon = 'epsilon', 0.02
reward_scaling, reward_scaling_update = 20, 'fixed'
phi_length = 5
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
momentum, learning_rate, update_period = 0.9, 0.01, 16
num_buffer, memory_size, batch_size, freeze_period  = 2, 200, 100, 4
log_file = "sleep_control/experiments/log/message_2016-6-7_2330_BW_P5.log"
time_step, n_days = pd.Timedelta(seconds=2), 7
head_datetime = pd.to_datetime("2014-11-01 0:00:00")
tail_datetime = head_datetime + pd.Timedelta(days=1)*n_days*5
head_datetime = head_datetime - time_step*(num_buffer*memory_size+phi_length)

In [28]:
log_file = open(log_file,"w")
dim_state = (1, phi_length, 3+2)
actions = [(True, None), (False, 'serve_all')]
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, head_datetime=head_datetime, tail_datetime=tail_datetime, time_step=time_step, verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
agent = QAgentNNPhi(phi_length=phi_length, dim_state=dim_state, range_state=[[range_state_slice]*phi_length], actions=actions,
        alpha=alpha, learning_rate=learning_rate, momentum=momentum, update_period=update_period,
        reward_scaling=reward_scaling, reward_scaling_update=reward_scaling_update,
        memory_size=memory_size, num_buffer=num_buffer, freeze_period=freeze_period, batch_size=batch_size,
        gamma=gamma, explore_strategy=explore_strategy, epsilon=epsilon,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)
t = time.time()
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*n_days/step_sec
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
log_file.close()
time.time()-t

New TrafficEmulator with parameters:
  head=2014-10-31 23:46:30
  tail=2014-12-06 00:00:00
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


3293.329514026642

---

In [29]:
session_df_loc = session_df[session_df['location']=='第三食堂']
gamma, alpha=0.5, 0.9
explore_strategy, epsilon = 'epsilon', 0.02
reward_scaling, reward_scaling_update = 20, 'fixed'
phi_length = 25
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
momentum, learning_rate, update_period = 0.9, 0.01, 16
num_buffer, memory_size, batch_size, freeze_period  = 2, 200, 100, 4
log_file = "sleep_control/experiments/log/message_2016-6-7_2330_BW_P25.log"
time_step, n_days = pd.Timedelta(seconds=2), 7
head_datetime = pd.to_datetime("2014-11-01 0:00:00")
tail_datetime = head_datetime + pd.Timedelta(days=1)*n_days*5
head_datetime = head_datetime - time_step*(num_buffer*memory_size+phi_length)

In [30]:
log_file = open(log_file,"w")
dim_state = (1, phi_length, 3+2)
actions = [(True, None), (False, 'serve_all')]
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, head_datetime=head_datetime, tail_datetime=tail_datetime, time_step=time_step, verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
agent = QAgentNNPhi(phi_length=phi_length, dim_state=dim_state, range_state=[[range_state_slice]*phi_length], actions=actions,
        alpha=alpha, learning_rate=learning_rate, momentum=momentum, update_period=update_period,
        reward_scaling=reward_scaling, reward_scaling_update=reward_scaling_update,
        memory_size=memory_size, num_buffer=num_buffer, freeze_period=freeze_period, batch_size=batch_size,
        gamma=gamma, explore_strategy=explore_strategy, epsilon=epsilon,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)
t = time.time()
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*n_days/step_sec
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
log_file.close()
time.time()-t

New TrafficEmulator with parameters:
  head=2014-10-31 23:45:50
  tail=2014-12-06 00:00:00
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


3339.572259902954

---

In [31]:
session_df_loc = session_df[session_df['location']=='第三食堂']
gamma, alpha=0.5, 0.9
explore_strategy, epsilon = 'epsilon', 0.02
reward_scaling, reward_scaling_update = 20, 'fixed'
phi_length = 125
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
momentum, learning_rate, update_period = 0.9, 0.01, 16
num_buffer, memory_size, batch_size, freeze_period  = 2, 200, 100, 4
log_file = "sleep_control/experiments/log/message_2016-6-7_2330_BW_P125.log"
time_step, n_days = pd.Timedelta(seconds=2), 7
head_datetime = pd.to_datetime("2014-11-01 0:00:00")
tail_datetime = head_datetime + pd.Timedelta(days=1)*n_days*5
head_datetime = head_datetime - time_step*(num_buffer*memory_size+phi_length)

In [32]:
log_file = open(log_file,"w")
dim_state = (1, phi_length, 3+2)
actions = [(True, None), (False, 'serve_all')]
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, head_datetime=head_datetime, tail_datetime=tail_datetime, time_step=time_step, verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
agent = QAgentNNPhi(phi_length=phi_length, dim_state=dim_state, range_state=[[range_state_slice]*phi_length], actions=actions,
        alpha=alpha, learning_rate=learning_rate, momentum=momentum, update_period=update_period,
        reward_scaling=reward_scaling, reward_scaling_update=reward_scaling_update,
        memory_size=memory_size, num_buffer=num_buffer, freeze_period=freeze_period, batch_size=batch_size,
        gamma=gamma, explore_strategy=explore_strategy, epsilon=epsilon,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)
t = time.time()
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*n_days/step_sec
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
log_file.close()
time.time()-t

New TrafficEmulator with parameters:
  head=2014-10-31 23:42:30
  tail=2014-12-06 00:00:00
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


3542.331981897354

---

In [37]:
session_df_loc = session_df[session_df['location']=='第三食堂']
gamma, alpha=0.5, 0.9
explore_strategy, epsilon = 'epsilon', 0.02
reward_scaling, reward_scaling_update = 120, 'adaptive'
phi_length = 5
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
momentum, learning_rate, update_period = 0.9, 0.01, 16
num_buffer, memory_size, batch_size, freeze_period  = 2, 2000, 100, 4
log_file = "sleep_control/experiments/log/message_2016-6-7_2335_TQ_AR1_3.log"
step_sec, n_days = 2, 7

In [38]:
log_file = open(log_file,"w")
dim_state = (1, phi_length, 3+2)
actions = [(True, None), (False, 'serve_all')]
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=step_sec), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
agent = QAgentNNPhi(phi_length=phi_length, dim_state=dim_state, range_state=[[range_state_slice]*phi_length], actions=actions,
        alpha=alpha, learning_rate=learning_rate, momentum=momentum, update_period=update_period,
        reward_scaling=reward_scaling, reward_scaling_update=reward_scaling_update,
        memory_size=memory_size, num_buffer=num_buffer, freeze_period=freeze_period, batch_size=batch_size,
        gamma=gamma, explore_strategy=explore_strategy, epsilon=epsilon,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)
t = time.time()
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*n_days/step_sec
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
log_file.close()
time.time()-t

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


2895.3313660621643

---

In [39]:
session_df_loc = session_df[session_df['location']=='第三食堂']
gamma, alpha=0.5, 0.9
explore_strategy, epsilon = 'epsilon', 0.02
reward_scaling, reward_scaling_update = 5, 'adaptive'
phi_length = 5
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
momentum, learning_rate, update_period = 0.9, 0.01, 16
num_buffer, memory_size, batch_size, freeze_period  = 2, 2000, 100, 4
log_file = "sleep_control/experiments/log/message_2016-6-7_2335_TQ_AR5_2.log"
step_sec, n_days = 2, 7

In [40]:
log_file = open(log_file,"w")
dim_state = (1, phi_length, 3+2)
actions = [(True, None), (False, 'serve_all')]
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=step_sec), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
agent = QAgentNNPhi(phi_length=phi_length, dim_state=dim_state, range_state=[[range_state_slice]*phi_length], actions=actions,
        alpha=alpha, learning_rate=learning_rate, momentum=momentum, update_period=update_period,
        reward_scaling=reward_scaling, reward_scaling_update=reward_scaling_update,
        memory_size=memory_size, num_buffer=num_buffer, freeze_period=freeze_period, batch_size=batch_size,
        gamma=gamma, explore_strategy=explore_strategy, epsilon=epsilon,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)
t = time.time()
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*n_days/step_sec
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
log_file.close()
time.time()-t

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


2891.97047495842