In this notebook we use the output of tcpdump to compute the amount of traffic generated during each of the experiments.

In [1]:
import pandas as pd

In [2]:
sent_path = 'results/network_traffic/sent_cropped.txt'
received_path = 'results/network_traffic/received_cropped.txt'

In [3]:
sent_df = pd.read_csv(sent_path, sep=' ')
received_df = pd.read_csv(received_path, sep=' ')

In [50]:
dfs = (sent_df, received_df)

In [10]:
for df in dfs:
    df.columns = ['time', 'num_bytes']
    df['time'] = pd.to_datetime(df['time'])

In [24]:
sent_df['num_bytes']

0            66:
1            66:
2           226:
3            90:
4            66:
            ... 
59298434    226:
59298435     82:
59298436     66:
59298437     66:
59298438     66:
Name: num_bytes, Length: 59298439, dtype: object

In [13]:
sent_df.to_pickle('results/network_traffic/sent_df')

In [14]:
received_df.to_pickle('results/network_traffic/received_df')

In [62]:
sent_df = pd.read_pickle('results/network_traffic/sent_df')
received_df = pd.read_pickle('results/network_traffic/received_df')

In [63]:
sent_df['num_bytes'] = pd.to_numeric(sent_df['num_bytes'].map(lambda x: str(x)[:-1]), downcast='integer', errors='coerce')

In [66]:
received_df['num_bytes'] = pd.to_numeric(received_df['num_bytes'].map(lambda x: str(x)[:-1]), downcast='integer', errors='coerce')

In [67]:
received_df.isna().sum()

time         0
num_bytes    0
dtype: int64

In [65]:
sent_df.isna().sum()

time         0
num_bytes    0
dtype: int64

In [70]:
sent_df.to_pickle('results/network_traffic/sent_df')
received_df.to_pickle('results/network_traffic/received_df')

In [101]:
sent_df['seconds_to_next'] = pd.to_timedelta(sent_df['time'].shift(-1) - sent_df['time'])
sent_df = sent_df.drop(columns=['time'])

In [105]:
received_df['seconds_to_next'] = pd.to_timedelta(received_df['time'].shift(-1) - received_df['time'])
received_df = received_df.drop(columns=['time'])

In [106]:
sent_df['seconds_to_next'] = sent_df['seconds_to_next'].map(lambda x: x.total_seconds())
received_df['seconds_to_next'] = received_df['seconds_to_next'].map(lambda x: x.total_seconds())

In [108]:
received_df.head()

Unnamed: 0,num_bytes,seconds_to_next
0,66,0.000192
1,289,0.000281
2,217,0.029285
3,66,0.000125
4,66,0.001321


In [109]:
sent_df.to_pickle('results/network_traffic/sent_df')
received_df.to_pickle('results/network_traffic/received_df')

In [114]:
(sent_df['seconds_to_next'] > 29).sum()

36

In [115]:
(received_df['seconds_to_next'] > 29).sum()

36

In [121]:
sent_cutoffs = sent_df.index[sent_df['seconds_to_next'] > 29].to_list()
received_cutoffs =  received_df.index[received_df['seconds_to_next'] > 29].to_list()

In [128]:
# The last index
sent_cutoffs.append(sent_df.index.stop - 1)
received_cutoffs.append(received_df.index.stop - 1)

In [131]:
sent_sums = []
start = 0
for end in sent_cutoffs:
    sent_sums.append(sent_df['num_bytes'][start:end+1].sum())
    start = end

In [133]:
received_sums = []
start = 0
for end in received_cutoffs:
    received_sums.append(received_df['num_bytes'][start:end+1].sum())
    start = end

In [137]:
pd.DataFrame(data={"_": sent_sums}).to_csv("results/network_traffic/sent_summed.csv", sep='\n',index=False, header=False)

In [138]:
pd.DataFrame(data={"_": received_sums}).to_csv("results/network_traffic/received_summed.csv", sep='\n',index=False, header=False)

In [139]:
sent_sums = sent_sums[1:]
received_sums = received_sums[1:]

We create one file for each experiment, to match the scheme of the runtime data.

In [141]:
from experiment_params import d_list, m_list, n_log_list, private_list
import itertools

In [145]:
i = 0
for d, m, private, n_log in itertools.product(d_list, m_list, private_list, n_log_list):
    base_path = 'results/network_traffic/d-{}_m-{}_n-{}_{}_'.format(d, m, n_log, private)
    with open(base_path + 'sent.txt', 'w') as sent_summed_file:
        sent_summed_file.write(str(sent_sums[i]))
    with open(base_path + 'received.txt', 'w') as received_summed_file:
        received_summed_file.write(str(received_sums[i]))
    i += 1