In [29]:
import pandas
import networkx
import os
import json
import pandas as pd

In [30]:
#first, get the landpoint coordinates:
landPointDf = pd.read_csv("landpoints.csv")

cityLookup = {}
for l in landPointDf.itertuples():
    #add the city name for all 4 possible coordinates
    longitude = int(l.longitude)
    latitude = int(l.latitude)
    cityLookup[(longitude, latitude)] = l.description
    cityLookup[(-longitude, latitude)] = l.description
    cityLookup[(longitude, -latitude)] = l.description
    cityLookup[(-longitude, -latitude)] = l.description
    
#make a set with all the landing point coordinates
landPointCoords = set(cityLookup.keys())

In [31]:
#now extract a big set that contains all the coorinates that appear multiple times (important coordinates!)
# Directory with .geojson files
input_dir = "cablesGeojson"
output_file = "all_cables.csv"

def normCoord(long, lat):
    """Converts floating point coordinates to integers for easier comparisons!"""
    return (int(round(long * 1e6)), int(round(lat * 1e6)))

def coordBack(normedCoord):
    """Get the coord back from int to float"""
    return (float(normedCoord[0]) * 1e-6, float(normedCoord[1] * 1e-6))

# Loop through all files in the directory

cableCoords = []
for filename in os.listdir(input_dir):#iterate over all .geojson files
    if filename.endswith(".geojson"):
        filepath = os.path.join(input_dir, filename)
        #print(f"Processing {filepath} ...")

        # Load geojson file
        with open(filepath, "r", encoding="utf-8") as f:
            data = json.load(f)

        # Extract cable info
        for feature in data["features"]:
            coords = feature["geometry"].get("coordinates", [])
            if not coords:
                raise ValueError#this should never happen, cables should always have some coordinates!
            coords = [normCoord(c[0], c[1]) for c in coords]#get a list of tuples of coordinates of this cable segment
            cableCoords.extend(coords)#add the coordinates to the list
            
coordSet = set()
duplicateCoordSet = set()#set that will contain all the cable coordinates that appear multiple times
for c in cableCoords:
    if abs(c[0]) == 180000000:#cables at the 180th longitude have to be marked as important always, otherwise connections accross this border won't appear
        duplicateCoordSet.add(c)
    elif c not in coordSet:
        coordSet.add(c)
    else:
        duplicateCoordSet.add(c)#we found a duplicate

In [32]:

import networkx as nx
import pandas as pd

relevantCoords = duplicateCoordSet.union(landPointCoords)#these will be all the important coordinates!
#now go through all the cables again and for each one add edges between all pairs of relevant coordinates with weight equal to the cable's capacity

G = nx.Graph()
cableCapacities = pd.read_csv("withCapacity.csv")

oneCount = 0#how often was there a cable that wasn't added because only 1 of its coordinates was important?
zeroCount = 0#how often was there a cable that had no relevant coordinate at all???

# Loop through all files in the directory
for filename in os.listdir(input_dir):
    if filename.endswith(".geojson"):
        filepath = os.path.join(input_dir, filename)

        # Load geojson file
        with open(filepath, "r", encoding="utf-8") as f:
            data = json.load(f)
        
        # Extract cable info
        for feature in data["features"]:
            name = feature["properties"].get("Name", "Unknown")
            coords = feature["geometry"].get("coordinates", [])
            if not coords:
                raise ValueError#shouldn't happen
            coords = [normCoord(c[0], c[1]) for c in coords]#get a list of tuples of coordinates of this cable segment
            relevantCs = [c for c in coords if c in relevantCoords]#get only the relevant coordinates from this cable segment
            for i in range(len(relevantCs)):
                if abs(relevantCs[i][0])==180000000:
                    relevantCs[i] = (abs(relevantCs[i][0]), relevantCs[i][1])
            if len(relevantCs) == 0:#add the edge anyways, both endpoints will be marked as "undersea"
                G.add_edge(coords[0], coords[-1], name=name, capacity=cableCapacities[cableCapacities["name"] == name]["capacity"].values[0])#add the edge with the cable capacity & name
                zeroCount+=1
            if len(relevantCs) == 1:
                G.add_node(relevantCs[0], description="singleton")#WARNING: this name will actually be overwritten by <cityname> if this is a city!
                oneCount+=1
            for i in range(len(relevantCs)-1):#add edge between adjacent important points (adjacent on the cable)
                G.add_edge(relevantCs[i], relevantCs[i+1], name=name, capacity=cableCapacities[cableCapacities["name"] == name]["capacity"].values[0])#add the edge with the cable capacity & name


print(oneCount)
print(zeroCount)                 
print(G)

58
6
Graph with 1283 nodes and 1531 edges


In [33]:
#now rename the stuff

# Build a mapping old_node -> new_node_id
mapping = {}
new_attrs = {}
for idx, node in enumerate(G.nodes()):
    lon, lat = node
    city_name = None
    #now try to match to a city
    if (lon, lat) in cityLookup:
        city_name = cityLookup.get((lon, lat))
    else:
        city_name = "undersea"
    
    mapping[node] = idx  # old coordinate node -> new integer id
    realCoords = coordBack((lon, lat))
    new_attrs[idx] = {
        "lon": realCoords[0],#assign the proper coordinate!
        "lat": realCoords[1],
        "description": city_name
    }

# Relabel nodes
G = nx.relabel_nodes(G, mapping)

# Add the attributes
nx.set_node_attributes(G, new_attrs)

#test which cities are not matched:
citiesMatched = []
for n, d in list(G.nodes(data=True)):
    #print(n, d)
    citiesMatched.append(d["description"])
print(len(set(citiesMatched)))

cities = set(list(cityLookup.values()))
print(cities.difference(citiesMatched))

652
{'Hawaii (USA)', 'Broadstairs (United Kingdom)', 'Ras Sudar (Egypt)', 'Massawa (Eritrea)', 'Sangano Village (Cable Station) (Angola)', 'Berbera (Somalialand)', 'South La (Ghana)', 'Vale (Guernsey)', 'Reykjav�k (Iceland)', 'Izmir (Turkey)', 'Gambell BMH (USA)', 'Puerto Viejo (Venezuela)', 'Wrangell (USA)', 'Cairo (Egypt)', 'Skewjack (UK)', 'Punta Gorda (Venezuela)', 'Castricum (Netherlands)', 'Port Hedland (Australia)', "Cape D'Aguilar (Hong Kong)", 'Veules-les-Roses (France)', 'Portimao (Portugal)', 'Los Angeles (USA)', 'Grover Beach (USA)', 'Boma (Democratic Republic of Congo)', 'Seaford (United Kingdom)'}


In [34]:
for e in list(G.edges(data=True))[:5]:
    print(e)

(0, 1, {'name': 'AAE-1 (Asia Africa Europe)', 'capacity': np.float64(0.0)})
(0, 2, {'name': 'AAE-1 (Asia Africa Europe)', 'capacity': np.float64(0.0)})
(0, 3, {'name': 'AAE-1 (Asia Africa Europe)', 'capacity': np.float64(0.0)})
(1, 465, {'name': 'SEA-ME-WE-5', 'capacity': np.float64(0.0)})
(1, 377, {'name': 'SEA-ME-WE-5', 'capacity': np.float64(0.0)})


In [35]:
#remove redundant nodes: (the ones on the seafloor that connect to the same cable twice)
for n, d in list(G.nodes(data=True)):
    if d["description"] == "undersea":
        if G.degree[n] == 2:
            edges = list(G.edges(n, data=True))
            if edges[0][2]["name"] == edges[1][2]["name"]:
                neighbors = list(G.neighbors(n))#there will be 2 neighbors
                G.add_edge(neighbors[0], neighbors[1], name=edges[0][2]["name"], capacity=edges[0][2]["capacity"])#add an edge between the two neighbors with the same cable name & capacity
                G.remove_node(n)
                
print(G)

Graph with 1031 nodes and 1279 edges


In [36]:
# #visualize the graph with folium
# import folium

# def plot_graph_folium(G, output_file="graph_map.html", overlayGeojson = False):
#     # Extract coordinates from node attributes
#     coords = [coordBack((d["lon"], d["lat"])) for _, d in G.nodes(data=True)]
#     #coords = [coordBack(d["coord"]) for _, d in G.nodes(data=True)]
#     lons, lats = zip(*coords)
    
#     # Center map on average position
#     center_lat = sum(lats) / len(lats)
#     center_lon = sum(lons) / len(lons)
#     m = folium.Map(location=[center_lat, center_lon], zoom_start=3, tiles="CartoDB positron")

#     # Add nodes
#     for n, d in G.nodes(data=True):
#         lon, lat = coordBack((d["lon"], d["lat"]))
#         desc = d.get("description", "")
#         location = f"lon: {lon} lat: {lat}"
#         folium.CircleMarker(
#             location=[lat, lon],
#             radius=5,
#             popup=f"Node {n}<br>{desc}<br>{location}",
#             color="blue",
#             fill=True,
#             fill_color="blue",
#             fill_opacity=0.7
#         ).add_to(m)

#     # Add edges
#     for u, v in G.edges():
#         lon1, lat1 = coordBack((G.nodes[u]["lon"], G.nodes[u]["lat"]))
#         lon2, lat2 = coordBack((G.nodes[v]["lon"], G.nodes[v]["lat"]))
#         folium.PolyLine(
#             locations=[(lat1, lon1), (lat2, lon2)],
#             color="red",
#             weight=2,
#             opacity=0.6
#         ).add_to(m)
    
#     #overlay the "real network"
#     if overlayGeojson:
#         # Loop through all files in the directory
#         for filename in os.listdir(input_dir):
#             if filename.endswith(".geojson"):
#                 filepath = os.path.join(input_dir, filename)
#                 folium.GeoJson(filepath).add_to(m)

#     # Save map as HTML
#     m.save(output_file)
#     print(f"Map saved to {output_file}")
    
# plot_graph_folium(G, output_file="graph_map_overlayed_fixed.html", overlayGeojson=True)
# plot_graph_folium(G, output_file="graph_map_fixed.html", overlayGeojson=False)

In [37]:
#saving the graph to a file:

nx.write_graphml(G, "THE_GRAPH.graphml")

In [None]:

#Persisting issues:

#   disconnected components ->  assume in-country terrestrial cables?

#   some cities are singletons! Now I added them, but they don't have any edges! -> fix it by getting getting the coordinate at the other end of the cable (even though we know it's not a relevant coordinate) and adding it
#   like for example the cable from lon: 34.934425999999995 lat: 70.038511 that goes into nothingness above russia
#   why is the US west coast not connected to China's east coast?

#   some undersea-nodes are just dead ends (but they shouldn't be!) -> like the one below Cape Coast that should really be connected to Abidjan


#Sitka USA is a singleton! (somehow the cable from Angloon just doesn't get picked up???)