In [1]:
import ipaddress
import json
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
from datetime import datetime, timedelta
import numpy as np
from pathlib import Path
from collections import Counter
import ast
import matplotlib.colors as mcolors
import maxminddb
from itertools import combinations

def ip_to_subnet(ip):
   try:
       return str(ipaddress.IPv4Network(f"{ip}/24", strict=False).network_address) + "/24"
   except:
       return None

def ip_to_asn(ip_address, db_path='data/external/GeoLite2-ASN.mmdb'):
    try:
        with maxminddb.open_database(db_path) as reader:
            result = reader.get(ip_address)
            return result['autonomous_system_organization']
    except Exception as e:
        return None
   
def set_plt_latex_format():
    plt.rcParams.update({
        "font.size": 12,          # Base font size
        "axes.titlesize": 12,     # Title size
        "axes.labelsize": 10,     # Axis label size
        "xtick.labelsize": 10,    # X-tick label size
        "ytick.labelsize": 10,    # Y-tick label size
        "legend.fontsize": 12,    # Legend font size
        "figure.titlesize": 12,    # Figure title size
        "text.usetex": True,
        "font.family": "serif",
        "font.serif": ["Computer Modern Roman"]
    })

def retrieve_color_palette(n_colors=10, blends=None):
    if blends is None or len(blends) < 2:
        blends = ['#c40d1e', '#9013fe']  # Default red to purple
    
    # Convert all hex colors to RGB
    rgb_colors = [np.array(mcolors.hex2color(color)) for color in blends]
    
    # Calculate how many segments we need
    segments = len(blends) - 1
    colors_per_segment = n_colors // segments
    extra_colors = n_colors % segments  # Distribute remainder colors among segments
    
    # Create the color list
    colors = []
    
    for i in range(segments):
        start_color = rgb_colors[i]
        end_color = rgb_colors[i+1]
        
        # Calculate how many colors for this segment (distribute remainder)
        segment_colors = colors_per_segment + (1 if i < extra_colors else 0)
        
        # Create colors for this segment
        for j in range(segment_colors):
            if segment_colors > 1:  # Avoid division by zero
                t = j / (segment_colors - 1)
            else:
                t = 0
            color = tuple(start_color + (end_color - start_color) * t)
            colors.append(color)
    
    # Create matplotlib colormap and seaborn palette
    custom_cmap = mcolors.ListedColormap(colors)
    #custom_palette = sns.color_palette(colors, n_colors=len(colors))
    
    return colors, custom_cmap#, custom_palette

In [2]:
def load_json(folder_path):
    # load all jsons
    all_packets = []
    all_peers = []

    folders = [Path("data/packets/ams")]

    for folder_path in folders:
        for json_file in folder_path.glob("*.json"):
           with open(json_file, 'r') as f:
               data = json.load(f)
        
           for packet in data['packets']:
                packet_meta = {k: v for k, v in packet.items() if not k in ['local_peerlist_new', 'node_data', 'payload_data']}
    
                if not packet['node_data'] is None:
                    for k, v in packet['node_data'].items():
                        packet_meta[k] = v
                if not packet['payload_data'] is None:
                    for k, v in packet['payload_data'].items(): 
                        packet_meta[k] = v
    
                if not packet['local_peerlist_new'] is None:
                    packet['peerlist_length'] = len(packet['local_peerlist_new'])
                    for peer in packet['local_peerlist_new']:
                        peer_data = peer.copy()
                        peer_data['source_ip'] = packet['source_ip']
                        peer_data['timestamp'] = packet['timestamp']
                        peer_data['pl_identifier'] = packet['timestamp'] + '_' + packet['source_ip']
                        all_peers.append(peer_data)
    
                all_packets.append(packet_meta)

    return pd.DataFrame(all_packets), pd.DataFrame(all_peers)

folder_path=Path("data/packets/ams")
node = str.split(str(folder_path), '/')[-1]

peer_packets_df, peers_df = load_json(folder_path=folder_path)
# some data cleaning
possible_flags = ['1', '2', '1,2', '2,1', '1,1']
peer_packets_df = peer_packets_df[peer_packets_df['monero_flags'].isin(possible_flags)]
peer_packets_df['timestamp'] = pd.to_datetime(peer_packets_df['timestamp'])


In [4]:
print(peer_packets_df.groupby('command')['monero_flags'].value_counts())

command    monero_flags
1001       2                 62867
           1                 33534
1001,1002  2,1                  43
1001,1003  1,1                 214
1001,2003  1,1                   1
1001,2006  2,1                  23
1001,2008  2,1                   8
1002       2                532299
           1                488625
1002,1002  1,2                  25
           2,1                  23
1002,2002  2,1                  23
           1,1                  12
1002,2003  1,1                   1
1002,2006  2,1                  24
1002,2008  2,1                   7
           1,1                   4
1003       1                 22909
           2                 22255
1003,1002  2,1                   1
1007       1                 20710
           2                 20452
2001       1                   346
2001,1002  1,1                   1
2001,2002  1,1                   1
2002       1               5444205
2002,1002  1,1                 864
           1,2                 

In [5]:
has_comma_cmd = peer_packets_df['command'].str.contains(',', na=False)
has_comma_flags = peer_packets_df['monero_flags'].str.contains(',', na=False)
has_comma = has_comma_cmd | has_comma_flags

explode_rows = peer_packets_df[has_comma].copy()
keep_rows = peer_packets_df[~has_comma].copy()

new_rows = []

for idx, row in explode_rows.iterrows():
    # split values
    command_list = str(row['command']).split(',')
    flags_list = str(row['monero_flags']).split(',')
    
    # pair new values
    for cmd, flag in zip(command_list, flags_list):
        new_row = row.copy()
        new_row['command'] = cmd.strip()
        new_row['monero_flags'] = flag.strip()
        new_rows.append(new_row)

# Create DataFrame from exploded rows
exploded_df = pd.DataFrame(new_rows)

# Combine with rows that didn't need exploding
peer_packets_df = pd.concat([keep_rows, exploded_df], ignore_index=True).sort_values(['timestamp'])

In [6]:
print(peer_packets_df.groupby('command')['monero_flags'].value_counts())

command  monero_flags
1001     2                 62941
         1                 33749
1002     2                533067
         1                489639
1003     1                 23123
         2                 22287
1007     1                 20710
         2                 20452
2001     1                   348
2002     1               5446289
2003     1                   544
2004     1                   948
2006     1                  1057
2007     1                  1004
2008     1                264766
2009     1                   420
2010     1                    11
Name: count, dtype: int64
