In [None]:
from constants import servers
import analysis_functions

import pandas as pd
import maxminddb

import logging


logging.basicConfig(level=logging.INFO)

def ip_to_asn(ip_address, db_path='data/external/GeoLite2-ASN.mmdb'):
    try:
        with maxminddb.open_database(db_path) as reader:
            result = reader.get(ip_address)
            return result['autonomous_system_organization']
    except Exception as e:
        return None
    
anomaly_dict = {}

capture_id = "non" # non identifier (previously only used for abn list or not)

In [None]:

# Basic Summary Stats + Syntactic Violations: Support Flags Omission and Last Seen Transmission
support_flags_sources, lastseen_sources, basic_stats = analysis_functions.stats_syntactic(capture_id)
anomaly_dict['SF Omission'] = {}
anomaly_dict['SF Omission']['ips'] = list(support_flags_sources)
anomaly_dict['SF Omission']['asns'] = list({ip_to_asn(ip) for ip in anomaly_dict['SF Omission']['ips']})
anomaly_dict['LS Transmission'] = {}
anomaly_dict['LS Transmission']['ips'] = list(lastseen_sources)
anomaly_dict['LS Transmission']['asns'] = list({ip_to_asn(ip) for ip in anomaly_dict['LS Transmission']['ips']})

In [None]:
# Peer List Diversity
pl_div_ips, max_announced_subnets, median_announced_subnets = analysis_functions.peer_list_diversity(capture_id, threshold=0.05)
anomaly_dict['PL Diversity'] = {}
anomaly_dict['PL Diversity']['ips'] = list(pl_div_ips)
anomaly_dict['PL Diversity']['asns'] = list({ip_to_asn(ip) for ip in anomaly_dict['PL Diversity']['ips']})

In [None]:
# Peer List Similarity
pl_sim_ips, overlap_df = analysis_functions.peer_list_similarity(capture_id, threshold=0.99)
anomaly_dict['PL Similarity'] = {}
anomaly_dict['PL Similarity']['ips'] = list(pl_sim_ips)
anomaly_dict['PL Similarity']['asns'] = list({ip_to_asn(ip) for ip in anomaly_dict['PL Similarity']['ips']})

In [None]:
# ID:IP Anomalies
sus_id_cluster_ips, sus_id_anomaly_ips, interection_ips, union_ips, id_count_dist, num_clusters = analysis_functions.node_ids(capture_id)
anomaly_dict['ID:IP Anomaly'] = {}
#anomaly_dict['ID:IP Anomaly']['ips'] = list(sus_id_anomaly_ips)
#anomaly_dict['ID:IP Anomaly']['ips'] = list(sus_id_cluster_ips)
anomaly_dict['ID:IP Anomaly']['ips'] = list(union_ips)
anomaly_dict['ID:IP Anomaly']['asns'] = list({ip_to_asn(ip) for ip in anomaly_dict['ID:IP Anomaly']['ips']})


In [None]:
# Connection Anomalies
sus_short, sus_ping, sus_ts, all_latencies, conn_df = analysis_functions.connections(capture_id, threshold=120, min_tss=600, time_duration=660) 
anomaly_dict['Short-lived Conn'] = {}
anomaly_dict['Ping Flooding'] = {}
anomaly_dict['Throttled TS'] = {}
anomaly_dict['Short-lived Conn']['ips'] = list(sus_short)
anomaly_dict['Short-lived Conn']['asns'] = list({ip_to_asn(ip) for ip in anomaly_dict['Short-lived Conn']['ips']})
anomaly_dict['Ping Flooding']['ips'] = list(sus_ping)
anomaly_dict['Ping Flooding']['asns'] = list({ip_to_asn(ip) for ip in anomaly_dict['Ping Flooding']['ips']})
anomaly_dict['Throttled TS']['ips'] = list(sus_ts)
anomaly_dict['Throttled TS']['asns'] = list({ip_to_asn(ip) for ip in anomaly_dict['Throttled TS']['ips']})

In [None]:
values = all_latencies
print(f"Mean: {values.mean()}")
print(f"Median: {values.median()}")
print(f"Std Dev: {values.std()}")
print(f"Min: {values.min()}")
print(f"Max: {values.max()}")
print(f"\nDifference from expected (60): {values.mean() - 60}")

# Quartiles
print(f"\n25th percentile: {values.quantile(0.25)}")
print(f"99th percentile: {values.quantile(0.95)}")

Q3 = values.quantile(0.75)
IQR = values.quantile(0.75) - values.quantile(0.25)
threshold = Q3 + 3 * IQR  # 3*IQR for extreme outliers
irregular = values[values > threshold]

print(len(irregular))
print(min(irregular))

In [None]:
# Signature-only IPs (and ban listed)
banned_ips, signature_only_ips = analysis_functions.ban_and_signature(capture_id)
anomaly_dict['Signature Only'] = {}
anomaly_dict['Signature Only']['ips'] = list(signature_only_ips)
anomaly_dict['Signature Only']['asns'] = list({ip_to_asn(ip) for ip in anomaly_dict['Signature Only']['ips']})
anomaly_dict['Ban Listed'] = {}
anomaly_dict['Ban Listed']['ips'] = list(banned_ips)
anomaly_dict['Ban Listed']['asns'] = list({ip_to_asn(ip) for ip in anomaly_dict['Ban Listed']['ips']})


In [None]:
# Subnet and ASN Saturation
sus_subnet_ips, lion_peer_ips, median_subnet_peers = analysis_functions.subnets_asn_comb(capture_id)
lion_peers = len(lion_peer_ips)
anomaly_dict['High Subnet Sat'] = {}
anomaly_dict['High Subnet Sat']['ips'] = list(sus_subnet_ips)
anomaly_dict['High Subnet Sat']['asns'] = list({ip_to_asn(ip) for ip in anomaly_dict['High Subnet Sat']['ips']})


In [None]:
# In-Degree analysis
indegrees = pd.DataFrame(analysis_functions.indegree(capture_id))

In [None]:
# Final Eval
identified_ns_peers = analysis_functions.summarize_IPs_plot_overlap(capture_id, anomaly_dict)

In [None]:

all_ns_peers = analysis_functions.save_ns_peers(identified_ns_peers)

In [None]:
adversarial_peers = set()
adversarial_peers.update(anomaly_dict['Signature Only']['ips'])
adversarial_peers.update(lion_peer_ips)
saturation_stats = analysis_functions.plot_anom_saturation(capture_id, total_anomaly_set=identified_ns_peers, adversarial_peers=adversarial_peers)
reachable_peers = analysis_functions.reachable(capture_id, identified_ns_peers)

In [None]:
ban_list_stats = analysis_functions.compare_to_banlist(all_ns_peers)

In [None]:
pl_poison = analysis_functions.pl_poison_dist(capture_id, identified_ns_peers)
#network_distribution = analysis_functions.network_distribution(capture_id, identified_ns_peers)
analysis_functions.format_and_write_tex(capture_id, basic_stats, anomaly_dict, conn_df, all_latencies, indegrees, 
                     sus_id_anomaly_ips, sus_id_cluster_ips, num_clusters, median_subnet_peers, 
                     identified_ns_peers, reachable_peers, lion_peers, saturation_stats, 
                     ban_list_stats, pl_poison, len(all_ns_peers))

In [None]:
def get_ip_overlap(anomaly_dict, key1, key2):
    """Get overlapping IPs between two anomaly types"""
    if key1 not in anomaly_dict or key2 not in anomaly_dict:
        print(f"Error: One of the keys doesn't exist")
        return []
    
    ips1 = set(anomaly_dict[key1]['ips'])
    ips2 = set(anomaly_dict[key2]['ips'])
    
    overlap = list(ips1 & ips2)
    
    print(f"Overlap between '{key1}' and '{key2}':")
    print(f"  - {len(overlap)} common IPs out of {len(ips1)} and {len(ips2)}")
    
    return overlap

def get_ip_overlap_set(anomaly_dict, key1, ip_set):
    """Get overlapping IPs between two anomaly types"""
    if key1 not in anomaly_dict:
        print(f"Error: One of the keys doesn't exist")
        return []
    
    ips1 = set(anomaly_dict[key1]['ips'])
    ips2 = set(ip_set)
    
    overlap = list(ips1 & ips2)
    
    print(f"Overlap between '{key1}' and given set:")
    print(f"  - {len(overlap)} common IPs out of {len(ips1)} and {len(ips2)}")
    
    return overlap

print(anomaly_dict.keys())

In [None]:
keys = ['SF Omission', 'LS Transmission', 
        'PL Diversity', 'PL Similarity', 
        'ID:IP Anomaly', 'Short-lived Conn', 
        'Ping Flooding', 'Throttled TS', 
        'Signature Only', 'Ban Listed', 
        'High Subnet Sat']

overlap = get_ip_overlap(anomaly_dict, keys[-3], keys[4])
print(overlap[:10])

In [None]:
# who is in signature-only that is not in the highly saturated subnets
sig_not_sub = set()
counter = 0
for ip in anomaly_dict['Signature Only']['ips']:
    if not ip_to_asn(ip) == 'LIONLINK-NETWORKS':
        counter += 1
        sig_not_sub.add(ip)

print(len(sig_not_sub))

# what other anomalies do these IPs show
#overlap = get_ip_overlap_set(anomaly_dict, keys[0], sig_not_sub)
for key in keys:
    overlap = get_ip_overlap_set(anomaly_dict, key, sig_not_sub)