In [5]:
import numpy as np
import pandas as pd
import json
import time
from traffic_emulator import TrafficEmulator
pd.set_option('mode.chained_assignment', None)

In [2]:
# Setting up data
session_df = pd.read_csv(filepath_or_buffer='./data/net_traffic_nonull.dat', sep=',', names=['uid','location','startTime_unix','duration_ms','domainProviders','domainTypes','domains','bytesByDomain','requestsByDomain'])
session_df.index.name = 'sessionID'
session_df['endTime_unix'] = session_df['startTime_unix'] + session_df['duration_ms']
session_df['startTime_datetime'] = pd.to_datetime(session_df['startTime_unix'], unit='ms')  # convert start time to readible date_time strings
session_df['endTime_datetime'] = pd.to_datetime(session_df['endTime_unix'], unit='ms')
session_df['totalBytes'] = session_df['bytesByDomain'].apply(lambda x: x.split(';')).map(lambda x: sum(map(float, x)))  # sum bytes across domains
session_df['totalRequests'] = session_df['requestsByDomain'].apply(lambda x: x.split(';')).map(lambda x: sum(map(float, x)))  # sum requests across domains
session_df.sort(['startTime_datetime'], ascending=True, inplace=True)  # get it sorted
session_df['interArrivalDuration_datetime'] = session_df.groupby('location')['startTime_datetime'].diff()  # group-wise diff
session_df['interArrivalDuration_ms'] = session_df.groupby('location')['startTime_unix'].diff()  # group-wise diff



# Initialization

In [4]:
# Empty session_df
print "=======Initialization: Empty session_df======="
try:
    te = TrafficEmulator()  # should raise ValueError
except ValueError:
    pass
finally:
    pass

TrafficEmulator Initialization: session_df passed in is empty or None.


In [5]:
# Default values
print "=======Initialization: Default values======="
te = TrafficEmulator(session_df)
print te.time_step
print te.head_datetime
print te.tail_datetime
print te.verbose

0 days 00:00:01
2014-08-31 16:00:04.181000
2015-01-31 15:56:59.142000
0


In [6]:
# Verbose
print "=======Initialization: Verbose======="
te = TrafficEmulator(session_df, verbose=1)

New TrafficEmulator with parameters:
  head=2014-08-31 16:00:04.181000
  tail=2015-01-31 15:56:59.142000
  time_step=0 days 00:00:01
  epoch=0
  verbose=1


In [7]:
# Head and tail datetime
print "=======Initialization: Head and tail datetime======="
head, tail = pd.datetime(year=2014, month=9, day=5), pd.datetime(year=2014, month=9, day=3)
try:
    te = TrafficEmulator(session_df, head_datetime=head, tail_datetime=tail, time_step=pd.Timedelta(days=0.5))
except ValueError:
    pass

head_datetime > tail_datetime


# Traffic & Service

When doing tests, do not initiate with a time_step that is too large, this may increase the duration for each epoch greatly. E.g. 1h takes more than 10 seconds, while 1m only taks less than 0.5 seconds.

In [8]:
# Head and tail range:
# Datetime range larger than dataset. should observe empty traffic at first, and warning in the end.
print "=======Traffic & Service: datetime range======="
head, tail, time_step = pd.datetime(year=2014, month=9, day=3), pd.datetime(year=2014, month=9, day=7), pd.Timedelta(minutes=1)
te = TrafficEmulator(session_df, head_datetime=head, tail_datetime=tail, time_step=time_step)
for i in range(0, 10):
    temp = time.time()
    print "{} to {}".format(head+i*time_step, head+(i+1)*time_step)
    t = te.get_traffic()
    if t is not None:
        print t.index
    else:
        pass
    print "Reward = {} ({} seconds)".format(te.serve(service_df=pd.DataFrame()), time.time()-temp)

2014-09-03 00:00:00 to 2014-09-03 00:01:00
Int64Index([10555461,  2278064,   200945,   506527,  6993484,  9100301,
            11265633, 12025586, 10737777,  1193058,  3073418,  5475326,
                8164,  7651556,  2640087,  1139381,  2060208,  6129834,
             1779168, 10254099,   652833,  5023934,  5332006, 10407231,
             4109241,  7114956,  1268995,  4106174,  7465552,  6258698,
             1278092,  8447569,  8636712,  9705634,  9858103,  8462789,
              820683,  8723848,  8911084, 11628841,  5136285,  8283938,
             1050940, 10923707,    26237, 10328305,  2333594,  5952047,
             2481911,   317046, 11351672,  5332595,  8327244,  9078196,
            10254100, 10689487],
           dtype='int64', name=u'sessionID')
Reward = 0 (0.202744960785 seconds)
2014-09-03 00:01:00 to 2014-09-03 00:02:00
Int64Index([10555461,   200945,   506527,  6993484, 11265633, 12025586,
            10737777,  1193058,  3073418,  5475326,     8164,  7651556,
        

In [9]:
# No service:
# Provide no service for all sessions. Should observe active session persist during its period.
# And reward = -1 * (# sent sessions)
print "=======Traffic & Service: no service======="
head, tail, time_step = pd.datetime(year=2014, month=9, day=3), pd.datetime(year=2014, month=9, day=7), pd.Timedelta(minutes=1)
te = TrafficEmulator(session_df, head_datetime=head, tail_datetime=tail, time_step=time_step, verbose=0)
for i in range(0, 10):
    temp = time.time()
    print "{} to {}".format(head+i*time_step, head+(i+1)*time_step)
    t = te.get_traffic()
    if t is not None:
        print t.index
    else:
        pass
    service_df = pd.DataFrame(columns=['reqServedFlag_per_domain'], index=t.index if t is not None else pd.Index([]))
    service_df['reqServedFlag_per_domain'] = json.dumps({})
    print te.serve(service_df=service_df)
    print "{} seconds".format(time.time()-temp)

2014-09-03 00:00:00 to 2014-09-03 00:01:00
Int64Index([10555461,  2278064,   200945,   506527,  6993484,  9100301,
            11265633, 12025586, 10737777,  1193058,  3073418,  5475326,
                8164,  7651556,  2640087,  1139381,  2060208,  6129834,
             1779168, 10254099,   652833,  5023934,  5332006, 10407231,
             4109241,  7114956,  1268995,  4106174,  7465552,  6258698,
             1278092,  8447569,  8636712,  9705634,  9858103,  8462789,
              820683,  8723848,  8911084, 11628841,  5136285,  8283938,
             1050940, 10923707,    26237, 10328305,  2333594,  5952047,
             2481911,   317046, 11351672,  5332595,  8327244,  9078196,
            10254100, 10689487],
           dtype='int64', name=u'sessionID')
-442.0
0.245578050613 seconds
2014-09-03 00:01:00 to 2014-09-03 00:02:00
Int64Index([10555461,   200945,   506527,  6993484, 11265633, 12025586,
            10737777,  1193058,  3073418,  5475326,     8164,  7651556,
             2

In [10]:
# Full service
print "=======Traffic & Service: full service======="
head, tail, time_step = pd.datetime(year=2014, month=9, day=3), pd.datetime(year=2014, month=9, day=7), pd.Timedelta(minutes=1)
te = TrafficEmulator(session_df, head_datetime=head, tail_datetime=tail, time_step=time_step,verbose=0)
for i in range(0, 10):
    temp = time.time()
    print "{} to {}".format(head+i*time_step, head+(i+1)*time_step)
    t = te.get_traffic()
    if t is not None:
        print t.index
        service_df = pd.DataFrame(columns=['reqServedFlag_per_domain'], index=t.index)
        for idx in service_df.index:
            bytesSent_req_domain = json.loads(t.loc[idx, 'bytesSent_per_request_per_domain'])
            reqServedFlag_domain = {}
            for domain in bytesSent_req_domain:
                reqServedFlag_domain[domain] = [True]*len(bytesSent_req_domain[domain])
            service_df.loc[idx, 'reqServedFlag_per_domain'] = json.dumps(reqServedFlag_domain)
    else:
        service_df = pd.DataFrame()
    print te.serve(service_df=service_df)
    print "{} seconds".format(time.time()-temp)

2014-09-03 00:00:00 to 2014-09-03 00:01:00
Int64Index([10555461,  2278064,   200945,   506527,  6993484,  9100301,
            11265633, 12025586, 10737777,  1193058,  3073418,  5475326,
                8164,  7651556,  2640087,  1139381,  2060208,  6129834,
             1779168, 10254099,   652833,  5023934,  5332006, 10407231,
             4109241,  7114956,  1268995,  4106174,  7465552,  6258698,
             1278092,  8447569,  8636712,  9705634,  9858103,  8462789,
              820683,  8723848,  8911084, 11628841,  5136285,  8283938,
             1050940, 10923707,    26237, 10328305,  2333594,  5952047,
             2481911,   317046, 11351672,  5332595,  8327244,  9078196,
            10254100, 10689487],
           dtype='int64', name=u'sessionID')
444.0
0.260341882706 seconds
2014-09-03 00:01:00 to 2014-09-03 00:02:00
Int64Index([10555461,   200945,   506527,  6993484, 11265633, 12025586,
            10737777,  1193058,  3073418,  5475326,     8164,  7651556,
             26

In [11]:
# Partial service
print "=======Traffic & Service: partial service======="
head, tail, time_step = pd.datetime(year=2014, month=9, day=3), pd.datetime(year=2014, month=9, day=7), pd.Timedelta(minutes=1)
te = TrafficEmulator(session_df, head_datetime=head, tail_datetime=tail, time_step=time_step, verbose=0)
for i in range(0, 10):
    temp = time.time()
    print "{} to {}".format(head+i*time_step, head+(i+1)*time_step)
    t = te.get_traffic()
    if t is not None:
        print t.index
        service_df = pd.DataFrame(columns=['reqServedFlag_per_domain'], index=t.index)
        for idx in service_df.index:
            bytesSent_req_domain = json.loads(t.loc[idx, 'bytesSent_per_request_per_domain'])
            reqServedFlag_domain = {}
            for domain in bytesSent_req_domain:
                reqServedFlag_domain[domain] = (np.random.rand(len(bytesSent_req_domain[domain])) > 0.5).tolist()
            service_df.loc[idx, 'reqServedFlag_per_domain'] = json.dumps(reqServedFlag_domain)
    else:
        service_df = pd.DataFrame()
    print te.serve(service_df=service_df)
    print "{} seconds".format(time.time()-temp)

2014-09-03 00:00:00 to 2014-09-03 00:01:00
Int64Index([10555461,  2278064,   200945,   506527,  6993484,  9100301,
            11265633, 12025586, 10737777,  1193058,  3073418,  5475326,
                8164,  7651556,  2640087,  1139381,  2060208,  6129834,
             1779168, 10254099,   652833,  5023934,  5332006, 10407231,
             4109241,  7114956,  1268995,  4106174,  7465552,  6258698,
             1278092,  8447569,  8636712,  9705634,  9858103,  8462789,
              820683,  8723848,  8911084, 11628841,  5136285,  8283938,
             1050940, 10923707,    26237, 10328305,  2333594,  5952047,
             2481911,   317046, 11351672,  5332595,  8327244,  9078196,
            10254100, 10689487],
           dtype='int64', name=u'sessionID')
8.0
0.258217096329 seconds
2014-09-03 00:01:00 to 2014-09-03 00:02:00
Int64Index([10555461,   200945,   506527,  6993484, 11265633, 12025586,
            10737777,  1193058,  3073418,  5475326,     8164,  7651556,
             2640