In [1]:
%cd ~/ipython-notebook/dqn4wirelesscontrol/
import sys
import time
import pandas as pd
pd.set_option('mode.chained_assignment', None)  # block warnings due to DataFrame value assignment
# Project modules
from sleep_control.integration import Emulation
import sleep_control.traffic_emulator
import sleep_control.traffic_server
from sleep_control.controller import QController, DummyController
import qlearning.qtable
import qlearning.qnn
from qlearning.qtable import QAgent
from qlearning.qnn import QAgentNN
from qlearning.mixin import PhiMixin

Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled, CuDNN 4007)


/home/admin-326/Data/ipython-notebook/dqn4wirelesscontrol


# Setting-up data

In [8]:
std_out = sys.stdout

In [2]:
session_df = pd.read_csv(filepath_or_buffer='./sleep_control/data/net_traffic_nonull.dat', 
                         sep=',', 
                         names=['uid','location','startTime_unix','duration_ms','domainProviders','domainTypes','domains','bytesByDomain','requestsByDomain'])
session_df.index.name = 'sessionID'
session_df['endTime_unix'] = session_df['startTime_unix'] + session_df['duration_ms']
session_df['startTime_datetime'] = pd.to_datetime(session_df['startTime_unix'], unit='ms')  # convert start time to readible date_time strings
session_df['endTime_datetime'] = pd.to_datetime(session_df['endTime_unix'], unit='ms')
session_df['totalBytes'] = session_df['bytesByDomain'].apply(lambda x: x.split(';')).map(lambda x: sum(map(float, x)))  # sum bytes across domains
session_df['totalRequests'] = session_df['requestsByDomain'].apply(lambda x: x.split(';')).map(lambda x: sum(map(float, x)))  # sum requests across domains
session_df.sort(['startTime_datetime'], ascending=True, inplace=True)  # get it sorted
session_df['interArrivalDuration_datetime'] = session_df.groupby('location')['startTime_datetime'].diff()  # group-wise diff
session_df['interArrivalDuration_ms'] = session_df.groupby('location')['startTime_unix'].diff()  # group-wise diff



In [3]:
class QAgentNNPhi(PhiMixin, QAgentNN):
    def __init__(self, **kwargs):
        super(QAgentNNPhi, self).__init__(**kwargs)

# Experiment 1 phhi=1

In [10]:
session_df_loc = session_df[session_df['location']=='第三食堂']

In [20]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 1
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [21]:
# %%prun -T prun_log_9.txt -D prun_log_9.stat  # for code profiling
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-2_2254.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2577.39277101


# Experiment 2 phi=10

In [22]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 10
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [23]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-2_2340.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2580.11469007


# Experiment 3 phi=100

In [24]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 100
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [25]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0020.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2627.48975396


# Experiment 4 phi=5

In [26]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 5
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [27]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0100.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2566.45933318


# Experiment 5 phi=5

In [28]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 5
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [29]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0140.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2576.59158397


# Experiment 6 phi=5

In [30]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 5
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [31]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0220.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2573.10563207


# Experiment 7 phi=50

In [32]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 50
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [33]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0300.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2598.19625807


# Experiment 8 phi=50

In [34]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 50
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [35]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0340.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2596.56152797


# Experiment 9 phi=50

In [36]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 50
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [37]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0420.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2635.18577003


# Experiment 10 adaptive

In [40]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 10
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='adaptive', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [41]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0500.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2588.32669091


# Experiment 11 adaptive

In [42]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 10
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='adaptive', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [43]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0540.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2586.32308507


# Experiment 12 adaptive

In [44]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 10
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='adaptive', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [45]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0620.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2587.63884783


# Experiment 13 1buffer

In [46]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 10
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=1,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [47]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0700.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2625.46522808


# Experiment 14 1buffer

In [48]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 10
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=1,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [49]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0740.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2589.81189013


# Experiment 15 1 buffer

In [50]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 10
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=1,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [51]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0800.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2586.55686307


# Experiment 16 epsilon

In [52]:
te = sleep_control.traffic_emulator.TrafficEmulator(session_df=session_df_loc, time_step=pd.Timedelta(seconds=2), verbose=2)
ts = sleep_control.traffic_server.TrafficServer(cost=5, verbose=2)
actions = [(True, None), (False, 'serve_all')]
phi_length = 10
range_state_slice = [(0, 10), (0, 10), (0, 10), (0, 1), (0, 1)]
agent = QAgentNNPhi(
        phi_length=phi_length,
        dim_state=(1, phi_length, 3+2),
        range_state=[[range_state_slice]*phi_length],
        actions=actions,
        learning_rate=0.01, reward_scaling=10, reward_scaling_update='fixed', batch_size=100,
        freeze_period=50, memory_size=200, num_buffer=2,
        alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.2,
        verbose=2)
c = QController(agent=agent)
emu = sleep_control.integration.Emulation(te=te, ts=ts, c=c)

New TrafficEmulator with parameters:
  head=2014-09-25 09:20:31.474000
  tail=2015-01-31 15:36:36.279000
  time_step=0 days 00:00:02
  epoch=0
  verbose=2


In [53]:
t = time.time()
log_file = open("sleep_control/experiments/log/message_2016-6-3_0840.log","w")
sys.stdout = log_file
TOTAL_EPOCHS = 3600*24*7/2  # 7 days
while emu.epoch is not None and emu.epoch<TOTAL_EPOCHS:
    # log time
    print "Epoch {}, ".format(emu.epoch),
    left = emu.te.head_datetime + emu.te.epoch*emu.te.time_step
    right = left + emu.te.time_step
    print "{} - {}".format(left.strftime("%Y-%m-%d %H:%M:%S"), right.strftime("%Y-%m-%d %H:%M:%S"))
    emu.step()
    print
sys.stdout = std_out
print time.time()-t

2567.92436004


# Reloading

In [7]:
reload(sleep_control.integration)
reload(sleep_control.traffic_server)
reload(sleep_control.traffic_emulator)
import qlearning
reload(qlearning.qnn)
reload(qlearning.qtable)

<module 'qlearning.qtable' from 'qlearning/qtable.pyc'>