In [1]:
import pandas as pd
import numpy as np
import json
import re
from scipy.stats import gamma

In [2]:
NUM_TOPOS = 100

MAX_SAMPLES = 10000
SEED = 1234

LATENCY_FILES_DIR = '../../latency-files'
TOPO_CONFIG_DIR = '../../topo-configs'
GAMMA_PARAMS_FILE = '../../hvnet-gamma-parameters/gamma-params.json'

min_max_params = {}

In [3]:
# Flow rates and gamma params
with open(GAMMA_PARAMS_FILE) as f:
    gamma_params = json.load(f)

df = pd.DataFrame(gamma_params).T.reset_index()
df.columns = ['rate', 'shape', 'scale']
df['rate'] = df['rate'].astype(float)

df.rate = df.rate / 125  # scale to kbit/s

min_max_params['flow_rate'] = {
    'min': df['rate'].min(),
    'max': df['rate'].max(),
    'mean': df['rate'].mean(),
    'std': df['rate'].std()
}
min_max_params['flow_gamma_shape'] = {
    'min': df['shape'].min(),
    'max': df['shape'].max(),
    'mean': df['shape'].mean(),
    'std': df['shape'].std()
}
min_max_params['flow_gamma_scale'] = {
    'min': df['scale'].min(),
    'max': df['scale'].max(),
    'mean': df['scale'].mean(),
    'std': df['scale'].std()
}

In [4]:
# HVNet link rate
hvnet_link_rates = []
for i in range(NUM_TOPOS):
    with open(f'{TOPO_CONFIG_DIR}/nw-{i}.json') as f:
        topo_config = json.load(f)
    for link in topo_config["link"]:
        hvnet_link_rates.append(link["rate"])

hvnet_link_rates = np.array(hvnet_link_rates)
        
min_max_params['hvnet_link_rate'] = {
    'min': hvnet_link_rates.min(),
    'max': hvnet_link_rates.max(),
    'mean': hvnet_link_rates.mean(),
    'std': hvnet_link_rates.std()
}

In [5]:
# OMNeT++ link rate (hardcoded to 1000)
min_max_params['omnet_link_rate'] = {
    'min': 0.0,
    'max': 1000.0,
    'mean': 1000.0,
    'std': 0.0
}

In [6]:
# HVNet and OMNeT++ flow latency (mean, min, max, ...)
FEATURES = (
    ['mean', 'min', 'max', 'gamma_shape', 'gamma_scale']
    + [f'p{i}' for i in range(5, 96, 5)]
    + ['p99', 'p99.9', 'p99.99', 'p99.999', 'p99.9999']
)
    
def calc_feature(data, feature):
    quantile_pattern = r'p(\d+(\.\d+)?)'
    match = re.search(quantile_pattern, feature)
    if match:
        quantile = float(match.group(1)) / 100
        return data.quantile(quantile)

    if feature == 'mean':
        return data.mean()
    elif feature == 'min':
        return data.min()
    elif feature == 'max':
        return data.max()
    elif feature == 'gamma_shape':  # this covers both shape and scale
        if len(data) > MAX_SAMPLES:
            data = data.sample(n=MAX_SAMPLES, random_state=SEED)
        shape, _, scale = gamma.fit(data, floc=0)  # fix the loc param to 0
        return shape, scale
    else:
        raise ValueError(f'Invalid feature: {feature}')

latencies = {
    "hvnet": {},
    "omnet": {}
}
for feature in FEATURES:
    latencies['hvnet'][feature] = []
    latencies['omnet'][feature] = []

for i in range(NUM_TOPOS):
    try:
        print(f'loading config for topo {i}...', end='')
        with open(f'{TOPO_CONFIG_DIR}/nw-{i}.json') as f:
            topo_config = json.load(f)
            print('done.')
        print(f'loading HVNet latencies for topo {i}...', end='')
        df_latency_hvnet = pd.read_csv(f'{LATENCY_FILES_DIR}/latencies-hvnet/{i:02d}-latencies-hvnet-preprocessed.csv')
        print('done.')

        print(f'loading OMNeT++ latencies for topo {i}...', end='')
        df_latency_omnet = pd.read_csv(f'{LATENCY_FILES_DIR}/latencies-omnet/{i:02d}-latencies-omnet-preprocessed.csv')
        print('done.')

        for flow in topo_config['flow']:
            flow_num = int(flow['name'][1:])

            # HVNet
            flow_latency_hvnet = df_latency_hvnet[df_latency_hvnet.flow_num == flow_num].latency_us
            for feature in FEATURES:
                if feature == 'gamma_shape':
                    shape, scale = calc_feature(flow_latency_hvnet, feature)
                    latencies['hvnet']['gamma_shape'].append(shape)
                    latencies['hvnet']['gamma_scale'].append(scale)
                elif feature == 'gamma_scale':
                    continue
                else:
                    f_val = calc_feature(flow_latency_hvnet, feature)
                    latencies['hvnet'][feature].append(f_val)

            # OMNeT++
            flow_latency_omnet = df_latency_omnet[df_latency_omnet.flow_num == flow_num].latency_us
            for feature in FEATURES:
                if feature == 'gamma_shape':
                    shape, scale = calc_feature(flow_latency_omnet, feature)
                    latencies['omnet']['gamma_shape'].append(shape)
                    latencies['omnet']['gamma_scale'].append(scale)
                elif feature == 'gamma_scale':
                    continue
                else:
                    f_val = calc_feature(flow_latency_omnet, feature)
                    latencies['omnet'][feature].append(f_val)

    except FileNotFoundError:
        print(f'\nsome data for topo {i} is missing, skipping...')
        continue

for feature in FEATURES:
    latencies['hvnet'][feature] = np.array(latencies['hvnet'][feature])
    min_max_params[f'flow_latency_hvnet_{feature}'] = {
        'min': latencies['hvnet'][feature].min(),
        'max': latencies['hvnet'][feature].max(),
        'mean': latencies['hvnet'][feature].mean(),
        'std': latencies['hvnet'][feature].std()
    }
    latencies['omnet'][feature] = np.array(latencies['omnet'][feature])
    min_max_params[f'flow_latency_omnet_{feature}'] = {
        'min': latencies['omnet'][feature].min(),
        'max': latencies['omnet'][feature].max(),
        'mean': latencies['omnet'][feature].mean(),
        'std': latencies['omnet'][feature].std()
    }

loading config for topo 0...done.
loading HVNet latencies for topo 0...done.
loading OMNeT++ latencies for topo 0...done.
loading config for topo 1...done.
loading HVNet latencies for topo 1...done.
loading OMNeT++ latencies for topo 1...done.
loading config for topo 2...done.
loading HVNet latencies for topo 2...done.
loading OMNeT++ latencies for topo 2...done.
loading config for topo 3...done.
loading HVNet latencies for topo 3...done.
loading OMNeT++ latencies for topo 3...done.
loading config for topo 4...done.
loading HVNet latencies for topo 4...done.
loading OMNeT++ latencies for topo 4...done.
loading config for topo 5...done.
loading HVNet latencies for topo 5...done.
loading OMNeT++ latencies for topo 5...done.
loading config for topo 6...done.
loading HVNet latencies for topo 6...done.
loading OMNeT++ latencies for topo 6...done.
loading config for topo 7...done.
loading HVNet latencies for topo 7...done.
loading OMNeT++ latencies for topo 7...done.
loading config for topo 

loading OMNeT++ latencies for topo 65...done.
loading config for topo 66...done.
loading HVNet latencies for topo 66...done.
loading OMNeT++ latencies for topo 66...done.
loading config for topo 67...done.
loading HVNet latencies for topo 67...done.
loading OMNeT++ latencies for topo 67...done.
loading config for topo 68...done.
loading HVNet latencies for topo 68...done.
loading OMNeT++ latencies for topo 68...done.
loading config for topo 69...done.
loading HVNet latencies for topo 69...done.
loading OMNeT++ latencies for topo 69...done.
loading config for topo 70...done.
loading HVNet latencies for topo 70...done.
loading OMNeT++ latencies for topo 70...done.
loading config for topo 71...done.
loading HVNet latencies for topo 71...done.
loading OMNeT++ latencies for topo 71...done.
loading config for topo 72...done.
loading HVNet latencies for topo 72...done.
loading OMNeT++ latencies for topo 72...done.
loading config for topo 73...done.
loading HVNet latencies for topo 73...done.


In [7]:
print(json.dumps(min_max_params, indent=4))

{
    "flow_rate": {
        "min": 1000.0,
        "max": 831162.0,
        "mean": 102175.78169542385,
        "std": 112323.34995486778
    },
    "flow_gamma_shape": {
        "min": 0.5325625213779135,
        "max": 1.332074126203368,
        "mean": 1.096132224296692,
        "std": 0.08366515992073066
    },
    "flow_gamma_scale": {
        "min": 3.016075680917091e-06,
        "max": 0.003317930326235959,
        "mean": 0.00018517353638414607,
        "std": 0.00045421251453550443
    },
    "hvnet_link_rate": {
        "min": 413.04347826086956,
        "max": 1900.0,
        "mean": 671.8528995756718,
        "std": 195.02488880728907
    },
    "omnet_link_rate": {
        "min": 0.0,
        "max": 1000.0,
        "mean": 1000.0,
        "std": 0.0
    },
    "flow_latency_hvnet_mean": {
        "min": 31.001229748590692,
        "max": 14160.564920922061,
        "mean": 205.20020575132045,
        "std": 515.4280191835654
    },
    "flow_latency_omnet_mean": {
       