In [172]:
import pandas as pd
import networkx as nx
import json

# Load CSV
df = pd.read_csv("withCapacity.csv")

In [173]:

# Initialize graph (undirected, since cables connect both ways)
G = nx.Graph()

# Iterate over rows and add nodes/edges
for _, row in df.iterrows():
    start = (row["Start_Lon"], row["Start_Lat"])
    end = (row["End_Lon"], row["End_Lat"])
    
    # Add nodes with coordinate attributes
    if start not in G:
        G.add_node(start, lon=row["Start_Lon"], lat=row["Start_Lat"])
    if end not in G:
        G.add_node(end, lon=row["End_Lon"], lat=row["End_Lat"])
    
    # Add edge with cable attributes
    G.add_edge(
        start, end,
        name=row["Name"],
        capacity=row.get("Capacity(in Gbps)", None)
    )
    
# Load geojson with landing stations
with open("landpointsGeojson/Landing_Points.geojson", "r", encoding="utf-8") as f:
    geojson = json.load(f)

#Build lookup: (lon, lat) -> city name
city_lookup = {}
cityNames = []
for feature in geojson["features"]:
    coords = feature["geometry"]["coordinates"]
    lon, lat = round(coords[0], 6), round(coords[1], 6)
    city_name = feature["properties"].get("description", "unknown")
    cityNames.append(city_name)
    city_lookup[(lon, lat)] = city_name
    city_lookup[(lon, -lat)] = city_name#add the city with flipped lattitude
    city_lookup[(-lon, lat)] = city_name
    city_lookup[(-lon, -lat)] = city_name
print(len(cityNames))
print(len(set(cityNames)))

# Assign name attribute to all nodes
for node, data in G.nodes(data=True):
    lon, lat = round(data["lon"], 6), round(data["lat"], 6)
    
    if (lon, lat) in city_lookup:
        G.nodes[node]["name"] = city_lookup[(lon, lat)]
    else:
        G.nodes[node]["name"] = "undersea"

# from math import radians, sin, cos, sqrt, atan2

# # # Haversine distance (km) between two lon/lat points
# def haversine(lon1, lat1, lon2, lat2):
#     R = 6371.0  # Earth radius in km
#     dlon = radians(lon2 - lon1)
#     dlat = radians(lat2 - lat1)
#     a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
#     c = 2 * atan2(sqrt(a), sqrt(1-a))
#     return R * c

# # # Assign city names by nearest match (within threshold)
# max_distance_km = 1

# for node, data in G.nodes(data=True):
#     lon, lat = data["lon"], data["lat"]
#     assigned = False
#     for city_lon, city_lat in city_lookup:
#         dist = haversine(lon, lat, city_lon, city_lat)
#         if dist <= max_distance_km:  # within threshold
#             G.nodes[node]["name"] = city_lookup[(city_lon, city_lat)]
#             assigned = True
#             break
#     if not assigned:
#         G.nodes[node]["name"] = "undersea"

# Example: show number of nodes and edges
print("Nodes:", len(G.nodes))
print("Edges:", len(G.edges))

#get the entries for some of the cities to see if it worked:
cityNamesGraph = []
for _, d in list(G.nodes(data=True)):
    cityNamesGraph.append(d["name"])
print(f"Length of city names in graph: {len(cityNamesGraph)}")
print(f"Number of unique city names in graph:{len(set(cityNamesGraph))-1}")#-1 because "undersea" will be one of the values
print(f"Occurences of \"undersea\": {cityNamesGraph.count("undersea")}")
difference = set(cityNames).difference(cityNamesGraph)
print(difference)
print(len(difference))

# Save graph to file (GraphML for readability, or GEXF for Gephi)
nx.write_graphml(G, "subsea_cables.graphml")

703
677
Nodes: 1074
Edges: 1119
Length of city names in graph: 1074
Number of unique city names in graph:546
Occurences of "undersea": 508
{'Oxford Falls, Sydney (Australia)', ' ()', 'Rankin Inlet (Canada)', 'Quagtaq (Canada)', 'Anaduir (Russia)', 'San Jose (USA)', 'Sochi (Russia)', 'Uturoa (French Polynesia)', 'Island Park (USA)', 'Helsing�r (Denmark)', 'Algiers (Algeria)', 'Puerto Cabezas (Nicaragua)', 'Auckland (New Zealand)', 'Ketchikan (USA)', 'Oahu (Hawaii)', 'Ninomiya (Japan)', 'Paddington, Sydney (Australia)', 'Boma (Democratic Republic of Congo)', 'Punta Gorda (Venezuela)', 'Puerto Lempira (Honduras)', 'La Union (Philippines)', 'Bunkeflostrand (Sweden)', 'Port Hedland (Australia)', 'Ustupo (Panama)', 'Koropi (Greece)', 'Basseterre (Saint Kitts and Nevis)', 'Sainte Marie (Reunion)', 'Cambridge Bay CLS (Canada)', 'Paljassaare (Estonia)', 'Okha (Sakhalin) ()', 'Ladyville (Belize)', 'Tahkuna (Estonia)', 'Santana (S�o Tom� and Pr�ncipe)', 'Caracas (Venezuela)', 'Taloyoak (Canada)',

In [174]:
#okay, first let's see how many intersections there actually are:
cableCoordinates = set()
for _, row in df.iterrows():
    start = (row["Start_Lon"], row["Start_Lat"])
    end = (row["End_Lon"], row["End_Lat"])
    cableCoordinates.add(start)
    cableCoordinates.add(end)
    
landingCoordinates = set()
landingCoordinatesFlipLat = set()
for feature in geojson["features"]:
        city_name = feature["properties"].get("Name", "unknown")
        city_lon, city_lat = feature["geometry"]["coordinates"]
        landingCoordinates.add((city_lon, city_lat))
        landingCoordinatesFlipLat.add((city_lon, -city_lat))
        #print(f"City: {city_name} Coords: {city_lon}, {city_lat}")

print(len(cableCoordinates.intersection(landingCoordinates.union(landingCoordinatesFlipLat))))
#seems like 155 cable coordinates match city coordinates exactly 

155


In [175]:
landpointsDf = pd.read_csv("landpoints.csv")
landpointsDf.head()

Unnamed: 0,index,longitude,latitude,name,description
0,0,88.494067,30.33285,Pascagoula,Pascagoula (USA)
1,1,95.342067,28.913233,Freeport,Freeport (USA)
2,2,32.594934,25.974618,Maputo,Maputo (Mozambique)
3,3,31.768317,28.962467,Mtunzini,Mtunzini (South Africa)
4,4,43.698021,23.409263,Toliara,Toliara (Madagascar)


In [176]:
uniqueNames = list(landpointsDf["name"].unique())
names = list(landpointsDf["name"])
for n in uniqueNames:
    names.remove(n)#remove 1 occurence of this unique name

names.sort()
print(names)
print(len(names))

['Angoon', 'Ayre of Cara', 'Baie du Jacotet', 'Bruges', 'Dublin', 'Dublin', 'Dublin', 'Freeport', 'Gedebak Odde', 'Hawaiki Landing', 'Kilmore Quay', 'Kochi', 'Lagos', 'Manasquan', 'Marmaris', 'Miami', 'Pacific City', 'Saint-Paul', 'Sandy Point', 'Shantou', 'Southport', 'Southport', 'Tripoli', 'Tuas', 'Tuckerton', 'Varna', 'Zandvoort', 'Zandvoort']
28


In [177]:
#same for description:
uniqueDescr = list(landpointsDf["description"].unique())
descr = list(landpointsDf["description"])
for d in uniqueDescr:
    descr.remove(d)#remove 1 occurence of this unique description

# descr.sort()
# print(descr)
# print(len(descr))

descriptionCities = [x.split(" (")[0] for x in descr]
descriptionCities.sort()
print(descriptionCities)
print(len(descriptionCities))

#for instance "Freeport" actually only appears in the top list because there is Freeport in the USA and Freeport on the Bahamas
#   -> we should keep using the description instead of the name

['Angoon', 'Ayre of Cara', 'Baie du Jacotet', 'Bruges', 'Dublin', 'Dublin', 'Dublin', 'Gedebak Odde', 'Kilmore Quay', 'Kochi', 'Lagos', 'Manasquan', 'Marmaris', 'Miami', 'Pacific City', 'Saint-Paul', 'Shantou', 'Southport', 'Southport', 'Tuas', 'Tuckerton', 'Varna', 'Zandvoort', 'Zandvoort']
24


In [178]:
### Realizations & ToDos:
#   some  cities appear multiple times (4x dublin in the landing points)
#   with slighly different coordinates. We could just merge nodes based on their name attribute at the
#   end (except for the undersea nodes)

#   why are some cities not matched to any cable?
#   -> let's add them anyways and visualize the thing to see!

In [None]:
#oooookay: so my assumption that the first coordinate of a cable and the last one are the landing points, was false
#actually sometimes, a mid-coordinate of a cable might also touch a city/landing point and so it is important

#So, how we actually wanna construct this network:
#   get all landpoint coordinates with city names/descriptions
#   get all cable coordinates (so for each cable, all of it's coordinates)
#   -> save all of them such that there is a row for each coordinate and then another column contains
#   then name of the cable that connects to this point
#   make a new list of all relevant coordinates: those that appear (a) in cables and in landing points (b) in multiple cables
#   -> these conditions make a node
#   now iterate over all cables from the geojson files and for each cable do:
#   -   filter only the relevant coordinates
#   -   now add a connection in the graph for every pair of remaining coordinates for this cable
#       (weigt equal to the capacity of the cable + add cable name as an attribute)
#   
#   -   iterate overa all the nodes in the constructed network and assign city names(descriptions) to those
#       who appear in the city list (undersea otherwise)

#Afterwards: can start merging more stuff(like nodes inside a county, etc...)

