Prerequisite:
* Zonal : Setup cluster for zonal setup  (normal setup)  
* Regional : Setup cluster with regional setup ([here](https://drive.google.com/file/d/1iLjm2guC_7SAJwefXV5NBVMWnsKMqMNM/view?usp=sharing) are the steps to build it)

In [None]:
import pandas as pd
import numpy as np
import json, re
from pandas import json_normalize
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import gaussian_kde

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/My Drive/Colab Notebooks/bcgossip/sim/gpbc/cnsim_plosone/geographical

/content/drive/My Drive/Colab Notebooks/bcgossip/sim/gpbc/cnsim_plosone/geographical


In [None]:
# Load data for Zonal
data = pd.read_csv('test-zonal-10X.csv')

# Remove rows where 'message' ends with "-0" (convergence phase)
data = data[~data['message'].str.endswith("-0")]

# Extract number of nodes from 'message'
data['num_nodes'] = data['message'].str.extract(r'cubaan(\d+)-')[0].astype(int)

# Filter rows for 'initiate' and 'received' events
initiate_data = data[data['event_type'] == 'initiate']
received_data = data[data['event_type'] == 'received']

# Group by 'message' and find max 'received' timestamp and 'initiate' timestamp
max_received_timestamps = received_data.groupby('message')['received_timestamp'].max()
initiate_timestamps = initiate_data.groupby('message')['received_timestamp'].first()

# Calculate propagation time in milliseconds (max_received - initiate)
propagation_times_ms = (max_received_timestamps - initiate_timestamps) / 1e6  # Convert nanoseconds to milliseconds
prop = propagation_times_ms.reset_index(name='propagation_time_ms')

# Extract number of nodes from 'message'
prop['num_nodes'] = prop['message'].str.extract(r'cubaan(\d+)').astype(int)

# Group by 'num_nodes' and calculate statistics (in milliseconds)
result_zonal = prop.groupby('num_nodes')['propagation_time_ms'].agg([
    ('mean (ms)', 'mean'),
    ('min (ms)', 'min'),
    ('median (ms)', 'median'),
    ('max (ms)', 'max')
])

# Round to 2 decimal places for readability
result_zonal = result_zonal.round(2)
result_zonal = result_zonal.reset_index()
result_zonal

Unnamed: 0,num_nodes,mean (ms),min (ms),median (ms),max (ms)
0,10,76.26,37.53,75.1,118.87
1,50,476.99,440.91,471.0,520.6
2,100,1052.15,958.08,1052.61,1144.5
3,200,2045.8,1864.91,2060.67,2173.12
4,400,4248.99,4043.1,4218.83,4546.74
5,600,6162.75,5516.64,6249.0,6758.07


In [None]:
# Load data for Regional
data = pd.read_csv('test-regional-10X.csv')

# Remove rows where 'message' ends with "-0" (convergence phase)
data = data[~data['message'].str.endswith("-0")]

# Extract number of nodes from 'message'
data['num_nodes'] = data['message'].str.extract(r'cubaan(\d+)-')[0].astype(int)

# Filter rows for 'initiate' and 'received' events
initiate_data = data[data['event_type'] == 'initiate']
received_data = data[data['event_type'] == 'received']

# Group by 'message' and find max 'received' timestamp and 'initiate' timestamp
max_received_timestamps = received_data.groupby('message')['received_timestamp'].max()
initiate_timestamps = initiate_data.groupby('message')['received_timestamp'].first()

# Calculate propagation time in milliseconds (max_received - initiate)
propagation_times_ms = (max_received_timestamps - initiate_timestamps) / 1e6  # Convert nanoseconds to milliseconds
prop = propagation_times_ms.reset_index(name='propagation_time_ms')

# Extract number of nodes from 'message'
prop['num_nodes'] = prop['message'].str.extract(r'cubaan(\d+)').astype(int)

# Group by 'num_nodes' and calculate statistics (in milliseconds)
result_regional = prop.groupby('num_nodes')['propagation_time_ms'].agg([
    ('mean (ms)', 'mean'),
    ('min (ms)', 'min'),
    ('median (ms)', 'median'),
    ('max (ms)', 'max')
])

# Round to 2 decimal places for readability
result_regional = result_regional.round(2)
result_regional = result_regional.reset_index()
result_regional

Unnamed: 0,num_nodes,mean (ms),min (ms),median (ms),max (ms)
0,10,93.47,49.83,88.96,158.41
1,50,511.43,278.08,530.91,612.61
2,100,1049.79,594.45,1083.98,1185.0
3,200,1941.29,1020.91,2016.2,2201.23
4,400,4356.01,4105.83,4269.05,4980.37
5,600,6363.11,5935.62,6318.89,6797.19


In [None]:
# Merge the two grouped DataFrames on 'num_nodes'
comparison = pd.merge(
    result_zonal.rename(columns={'mean (ms)': 'Zonal'}),
    result_regional.rename(columns={'mean (ms)': 'Regional'}),
    on='num_nodes',
    how='outer'
)

# Keep only 'num_nodes' and mean columns
comparison = comparison[['num_nodes', 'Zonal', 'Regional']]

# Display the final comparison table
# print(comparison)
comparison

Unnamed: 0,num_nodes,Zonal,Regional
0,10,76.26,93.47
1,50,476.99,511.43
2,100,1052.15,1049.79
3,200,2045.8,1941.29
4,400,4248.99,4356.01
5,600,6162.75,6363.11


In [None]:
# Virtualization
# Rename columns for better readability
comparison.rename(columns={'num_nodes': 'Total Nodes', 'Zonal': 'Zonal', 'Regional': 'Regional'}, inplace=True)

# Create a line graph with Plotly
fig = go.Figure()

# Add line plot for Zonal
fig.add_trace(go.Scatter(x=comparison['Total Nodes'], y=comparison['Zonal'],
                         mode='lines', line=dict(color='red', width=2),
                         name='Single Zone'))

# Add line plot for Regional
fig.add_trace(go.Scatter(x=comparison['Total Nodes'], y=comparison['Regional'],
                         mode='lines', line=dict(color='blue', width=2),
                         name='Multi Zones'))

# Update layout with aesthetic enhancements
fig.update_layout(
    xaxis_title='Total number of nodes',
    yaxis_title='Propagation Time (milliseconds)',
    showlegend=True,
    legend_title_text='Configurations',
    xaxis=dict(
        title_font=dict(size=18),
        tickfont=dict(size=14)
    ),
    yaxis=dict(
        title_font=dict(size=18),
        tickfont=dict(size=14)
    ),
    autosize=False,
    width=800,
    height=600,
    legend=dict(
        font=dict(size=14)
    )
)

# Show the plot
fig.show()