In [None]:
# Edge Betweenness Centrality
import pandas as pd
import networkx as nx

# Load the reachability data as a graph
reachability_data = pd.read_csv('reformatted_reachability.txt', delim_whitespace=True)
reachability_meta = pd.read_csv('reachability-meta.csv')

# Create a directed graph from reachability data
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate edge betweenness centrality
edge_betweenness = nx.edge_betweenness_centrality(G, weight='Weight')

# Sort by edge betweenness centrality in descending order and select top 10 edges
top_edges = sorted(edge_betweenness.items(), key=lambda x: x[1], reverse=True)[:10]

# Map node IDs to names using reachability-meta.csv
id_to_name = dict(zip(reachability_meta['node_id'], reachability_meta['name']))

# Display the results
print("EDGE BETWEENNESS CENTRALITY")
print(f"{'NODE NAME 1':<20} {'NODE NAME 2':<20} {'Edge Betweenness Centrality':<25}")
print("-" * 65)
for (node1, node2), centrality in top_edges:
    node1_name = id_to_name.get(node1, str(node1))
    node2_name = id_to_name.get(node2, str(node2))
    print(f"{node1_name:<20} {node2_name:<20} {centrality:.4f}")



The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\s+'`` instead



EDGE BETWEENNESS CENTRALITY
NODE NAME 1          NODE NAME 2          Edge Betweenness Centrality
-----------------------------------------------------------------
Kona, HI             Greenbrier, WV       0.9650
Buffalo, NY          Aspen, CO            0.9628
Cordova, AK          Miami, FL            0.9597
Columbus, OH         Castlegar, BC        0.9584
Manchester, NH       Hilo, HI             0.9583
Hilo, HI             Columbus, OH         0.9562
Moses Lake, WA       Charlotte, NC        0.9562
Miami, FL            Petersburg, AK       0.9554
Kahului, HI          Victoria, TX         0.9551
Telluride, CO        Buffalo, NY          0.9453


In [None]:
# Edge Betweenness Centrality on Map
import pandas as pd
import networkx as nx
import folium
from folium.plugins import MarkerCluster
import geopandas as gpd
import random

# Load reachability and metadata files
reachability_data = pd.read_csv('reformatted_reachability.txt', sep='\s+')
reachability_meta = pd.read_csv('reachability-meta.csv')

# Merge reachability data with metadata to get coordinates for nodes
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='FromNodeId', right_on='node_id'
).rename(columns={'latitude': 'from_lat', 'longitude': 'from_lon'})
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='ToNodeId', right_on='node_id'
).rename(columns={'latitude': 'to_lat', 'longitude': 'to_lon'})

# Create directed graph
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate edge betweenness centrality and select top 10 edges
edge_betweenness = nx.edge_betweenness_centrality(G, weight='Weight')
top_edges = sorted(edge_betweenness.items(), key=lambda x: x[1], reverse=True)[:10]

# Convert top edges to DataFrame for plotting
top_edges_df = pd.DataFrame([{'from_node': edge[0][0], 'to_node': edge[0][1], 'centrality': edge[1]} for edge in top_edges])
top_edges_df = top_edges_df.merge(reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
                                  left_on='from_node', right_on='node_id').rename(columns={'latitude': 'from_lat', 'longitude': 'from_lon'})
top_edges_df = top_edges_df.merge(reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
                                  left_on='to_node', right_on='node_id').rename(columns={'latitude': 'to_lat', 'longitude': 'to_lon'})

# Initialize Folium map centered on North America
m = folium.Map(location=[37.0902, -95.7129], zoom_start=4)

# Add base map layer for the Americas
americas_map = gpd.read_file('ne_110m_admin_0_countries.shp')
americas_map = americas_map[americas_map['CONTINENT'] == 'North America']

# Plot countries in Americas on Folium map
folium.GeoJson(americas_map).add_to(m)

# Define colors for the top 10 edges
colors = ["#"+''.join([random.choice('0123456789ABCDEF') for _ in range(6)]) for _ in range(10)]

# Create separate layers for each edge
for i, row in top_edges_df.iterrows():
    edge_layer = folium.FeatureGroup(name=f"{row['name_x']} to {row['name_y']} - Centrality: {row['centrality']:.4f}")

    # Plot the edge with a unique color
    folium.PolyLine(
        locations=[(row['from_lat'], row['from_lon']), (row['to_lat'], row['to_lon'])],
        color=colors[i],
        weight=3,
        tooltip=f"From: {row['name_x']}<br>To: {row['name_y']}<br>Betweenness Centrality: {row['centrality']:.4f}"
    ).add_to(edge_layer)

    # Add markers for each endpoint of the edge
    folium.Marker(
        location=(row['from_lat'], row['from_lon']),
        popup=row['name_x'],
        icon=folium.Icon(color="blue")
    ).add_to(edge_layer)
    folium.Marker(
        location=(row['to_lat'], row['to_lon']),
        popup=row['name_y'],
        icon=folium.Icon(color="red")
    ).add_to(edge_layer)

    # Add the edge layer to the map
    edge_layer.add_to(m)

# Add layer control to toggle visibility of edges
folium.LayerControl(collapsed=False).add_to(m)

# Save map as HTML
m.save("Edge Betweenness Centrality_map.html")


Inference for Edge Betweenness Centrality:
The edges between cities such as Kona, HI and Greenbrier, WV, and Buffalo, NY and Aspen, CO, have the highest edge betweenness centrality, indicating that they play key roles in facilitating communication or travel between different parts of the network. These connections are pivotal "bridges" that link otherwise distant or less connected regions and are likely critical for ensuring network connectivity.


In [None]:
# Degree Centrality
import pandas as pd
import networkx as nx

# Load the reachability data
reachability_data = pd.read_csv('reformatted_reachability.txt', delim_whitespace=True)
reachability_meta = pd.read_csv('reachability-meta.csv')

# Create a directed graph
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate degree centrality
degree_centrality = nx.degree_centrality(G)

# Map node IDs to names
id_to_name = dict(zip(reachability_meta['node_id'], reachability_meta['name']))

# Sort by degree centrality and select top 10 nodes
top_degree_centrality = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

# Display the results
print("DEGREE CENTRALITY")
print(f"{'NODE NAME':<20} {'Degree Centrality':<20}")
print("-" * 40)
for node, centrality in top_degree_centrality:
    node_name = id_to_name.get(node, str(node))
    print(f"{node_name:<25} {centrality:.4f}")



The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\s+'`` instead



DEGREE CENTRALITY
NODE NAME            Degree Centrality   
----------------------------------------
Los Angeles, CA           1.9473
San Francisco, CA         1.9143
Las Vegas, NV             1.8923
Chicago, IL               1.8879
Dallas/Fort Worth, TX     1.8879
Denver, CO                1.8813
New York, NY              1.8813
Washington, DC            1.8725
Phoenix, AZ               1.8615
Seattle/Tacoma, WA        1.8110


In [None]:
# Degree Centrality on Map
import pandas as pd
import networkx as nx
import folium
import geopandas as gpd
import random

# Load reachability and metadata files
reachability_data = pd.read_csv('reformatted_reachability.txt', sep='\s+')
reachability_meta = pd.read_csv('reachability-meta.csv')

# Merge reachability data with metadata to get coordinates for nodes
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='FromNodeId', right_on='node_id'
).rename(columns={'latitude': 'from_lat', 'longitude': 'from_lon'})
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='ToNodeId', right_on='node_id'
).rename(columns={'latitude': 'to_lat', 'longitude': 'to_lon'})

# Create directed graph
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate degree centrality and select top 10 nodes
degree_centrality = nx.degree_centrality(G)
top_nodes = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

# Convert top nodes to DataFrame for plotting
top_nodes_df = pd.DataFrame(top_nodes, columns=['node_id', 'centrality'])
top_nodes_df = top_nodes_df.merge(reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
                                    on='node_id')

# Initialize Folium map centered on North America
m = folium.Map(location=[37.0902, -95.7129], zoom_start=4)

# Add base map layer for the Americas
americas_map = gpd.read_file('ne_110m_admin_0_countries.shp')
americas_map = americas_map[americas_map['CONTINENT'] == 'North America']
folium.GeoJson(americas_map).add_to(m)

# Create separate layers for each node and edges between them
for i, row in top_nodes_df.iterrows():
    node_layer = folium.FeatureGroup(name=f"{row['name']} - Degree Centrality: {row['centrality']:.4f}")

    # Add marker for the node
    folium.Marker(
        location=(row['latitude'], row['longitude']),
        popup=f"{row['name']}<br>Degree Centrality: {row['centrality']:.4f}",
        icon=folium.Icon(color="blue")
    ).add_to(node_layer)

    # Add edges between the nodes
    edges = reachability_data[(reachability_data['FromNodeId'] == row['node_id']) |
                              (reachability_data['ToNodeId'] == row['node_id'])]
    for _, edge in edges.iterrows():
        from_node = reachability_meta[reachability_meta['node_id'] == edge['FromNodeId']]
        to_node = reachability_meta[reachability_meta['node_id'] == edge['ToNodeId']]

        if not from_node.empty and not to_node.empty:
            folium.PolyLine(
                locations=[(from_node['latitude'].values[0], from_node['longitude'].values[0]),
                            (to_node['latitude'].values[0], to_node['longitude'].values[0])],
                color=random.choice(['red', 'green', 'blue', 'orange']),
                weight=2,
                tooltip=f"{from_node['name'].values[0]} to {to_node['name'].values[0]}"
            ).add_to(node_layer)

    node_layer.add_to(m)

# Add layer control to toggle visibility of nodes and edges
folium.LayerControl(collapsed=False).add_to(m)

# Save map as HTML
m.save("Degree_Centrality_map.html")


Inference for Degree Centrality: Cities like Los Angeles, CA, and San Francisco, CA, show high degree centrality, signifying these cities are directly connected to many others. This indicates that these cities serve as major hubs within the network, offering high accessibility and acting as significant connection points in terms of direct links to other nodes.

In [None]:
# Closeness Centrality
import pandas as pd
import networkx as nx

# Load the reachability data
reachability_data = pd.read_csv('reformatted_reachability.txt', delim_whitespace=True)
reachability_meta = pd.read_csv('reachability-meta.csv')

# Create a directed graph
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate closeness centrality
closeness_centrality = nx.closeness_centrality(G)

# Map node IDs to names
id_to_name = dict(zip(reachability_meta['node_id'], reachability_meta['name']))

# Sort by closeness centrality and select top 10 nodes
top_closeness_centrality = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

# Display the results
print("CLOSENESS CENTRALITY")
print(f"{'NODE NAME':<20} {'Closeness Centrality':<20}")
print("-" * 40)
for node, centrality in top_closeness_centrality:
    node_name = id_to_name.get(node, str(node))
    print(f"{node_name:<25} {centrality:.4f}")



The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\s+'`` instead



CLOSENESS CENTRALITY
NODE NAME            Closeness Centrality
----------------------------------------
Los Angeles, CA           0.9743
San Francisco, CA         0.9701
Dallas/Fort Worth, TX     0.9519
Chicago, IL               0.9459
Las Vegas, NV             0.9459
New York, NY              0.9440
Denver, CO                0.9420
Washington, DC            0.9381
Phoenix, AZ               0.9343
Seattle/Tacoma, WA        0.9248


In [None]:
# Closeness Centrality on Map
import pandas as pd
import networkx as nx
import folium
import geopandas as gpd
import random

# Load reachability and metadata files
reachability_data = pd.read_csv('reformatted_reachability.txt', sep='\s+')
reachability_meta = pd.read_csv('reachability-meta.csv')

# Merge reachability data with metadata to get coordinates for nodes
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='FromNodeId', right_on='node_id'
).rename(columns={'latitude': 'from_lat', 'longitude': 'from_lon'})
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='ToNodeId', right_on='node_id'
).rename(columns={'latitude': 'to_lat', 'longitude': 'to_lon'})

# Create directed graph
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate closeness centrality and select top 10 nodes
closeness_centrality = nx.closeness_centrality(G)
top_nodes = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

# Convert top nodes to DataFrame for plotting
top_nodes_df = pd.DataFrame(top_nodes, columns=['node_id', 'centrality'])
top_nodes_df = top_nodes_df.merge(reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
                                    on='node_id')

# Initialize Folium map centered on North America
m = folium.Map(location=[37.0902, -95.7129], zoom_start=4)

# Add base map layer for the Americas
americas_map = gpd.read_file('ne_110m_admin_0_countries.shp')
americas_map = americas_map[americas_map['CONTINENT'] == 'North America']
folium.GeoJson(americas_map).add_to(m)

# Create separate layers for each node and edges between them
for i, row in top_nodes_df.iterrows():
    node_layer = folium.FeatureGroup(name=f"{row['name']} - Closeness Centrality: {row['centrality']:.4f}")

    # Add marker for the node
    folium.Marker(
        location=(row['latitude'], row['longitude']),
        popup=f"{row['name']}<br>Closeness Centrality: {row['centrality']:.4f}",
        icon=folium.Icon(color="blue")
    ).add_to(node_layer)

    # Add edges between the nodes
    edges = reachability_data[(reachability_data['FromNodeId'] == row['node_id']) |
                              (reachability_data['ToNodeId'] == row['node_id'])]
    for _, edge in edges.iterrows():
        from_node = reachability_meta[reachability_meta['node_id'] == edge['FromNodeId']]
        to_node = reachability_meta[reachability_meta['node_id'] == edge['ToNodeId']]

        if not from_node.empty and not to_node.empty:
            folium.PolyLine(
                locations=[(from_node['latitude'].values[0], from_node['longitude'].values[0]),
                            (to_node['latitude'].values[0], to_node['longitude'].values[0])],
                color=random.choice(['red', 'green', 'blue', 'orange']),
                weight=2,
                tooltip=f"{from_node['name'].values[0]} to {to_node['name'].values[0]}"
            ).add_to(node_layer)

    node_layer.add_to(m)

# Add layer control to toggle visibility of nodes and edges
folium.LayerControl(collapsed=False).add_to(m)

# Save map as HTML
m.save("Closeness_Centrality_map.html")


Inference for Closeness Centrality: Los Angeles, San Francisco, and Dallas/Fort Worth exhibit the highest closeness centrality scores, suggesting they are well-connected hubs. They are likely in positions that allow them to reach other cities quickly, making them central to this network.

In [None]:
# Eigenvector Centrality
import pandas as pd
import networkx as nx

# Load the reachability data
reachability_data = pd.read_csv('reformatted_reachability.txt', delim_whitespace=True)
reachability_meta = pd.read_csv('reachability-meta.csv')

# Create a directed graph
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate eigenvector centrality
eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000)

# Map node IDs to names
id_to_name = dict(zip(reachability_meta['node_id'], reachability_meta['name']))

# Sort by eigenvector centrality and select top 10 nodes
top_eigenvector_centrality = sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

# Display the results
print("EIGENVECTOR CENTRALITY")
print(f"{'NODE NAME':<20} {'Eigenvector Centrality':<20}")
print("-" * 40)
for node, centrality in top_eigenvector_centrality:
    node_name = id_to_name.get(node, str(node))
    print(f"{node_name:<25} {centrality:.4f}")



The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\s+'`` instead



EIGENVECTOR CENTRALITY
NODE NAME            Eigenvector Centrality
----------------------------------------
Los Angeles, CA           0.0857
San Francisco, CA         0.0854
Dallas/Fort Worth, TX     0.0851
Las Vegas, NV             0.0848
Chicago, IL               0.0847
Denver, CO                0.0846
Phoenix, AZ               0.0846
Seattle/Tacoma, WA        0.0844
Washington, DC            0.0840
Minneapolis/St Paul, MN   0.0838


In [None]:
# Eigenvector Centrality on Map
import pandas as pd
import networkx as nx
import folium
import geopandas as gpd
import random

# Load reachability and metadata files
reachability_data = pd.read_csv('reformatted_reachability.txt', sep='\s+')
reachability_meta = pd.read_csv('reachability-meta.csv')

# Merge reachability data with metadata to get coordinates for nodes
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='FromNodeId', right_on='node_id'
).rename(columns={'latitude': 'from_lat', 'longitude': 'from_lon'})
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='ToNodeId', right_on='node_id'
).rename(columns={'latitude': 'to_lat', 'longitude': 'to_lon'})

# Create directed graph
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate eigenvector centrality and select top 10 nodes
eigenvector_centrality = nx.eigenvector_centrality(G)
top_nodes = sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

# Convert top nodes to DataFrame for plotting
top_nodes_df = pd.DataFrame(top_nodes, columns=['node_id', 'centrality'])
top_nodes_df = top_nodes_df.merge(reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
                                    on='node_id')

# Initialize Folium map centered on North America
m = folium.Map(location=[37.0902, -95.7129], zoom_start=4)

# Add base map layer for the Americas
americas_map = gpd.read_file('ne_110m_admin_0_countries.shp')
americas_map = americas_map[americas_map['CONTINENT'] == 'North America']
folium.GeoJson(americas_map).add_to(m)

# Create separate layers for each node and edges between them
for i, row in top_nodes_df.iterrows():
    node_layer = folium.FeatureGroup(name=f"{row['name']} - Eigenvector Centrality: {row['centrality']:.4f}")

    # Add marker for the node
    folium.Marker(
        location=(row['latitude'], row['longitude']),
        popup=f"{row['name']}<br>Eigenvector Centrality: {row['centrality']:.4f}",
        icon=folium.Icon(color="blue")
    ).add_to(node_layer)

    # Add edges between the nodes
    edges = reachability_data[(reachability_data['FromNodeId'] == row['node_id']) |
                              (reachability_data['ToNodeId'] == row['node_id'])]
    for _, edge in edges.iterrows():
        from_node = reachability_meta[reachability_meta['node_id'] == edge['FromNodeId']]
        to_node = reachability_meta[reachability_meta['node_id'] == edge['ToNodeId']]

        if not from_node.empty and not to_node.empty:
            folium.PolyLine(
                locations=[(from_node['latitude'].values[0], from_node['longitude'].values[0]),
                            (to_node['latitude'].values[0], to_node['longitude'].values[0])],
                color=random.choice(['red', 'green', 'blue', 'orange']),
                weight=2,
                tooltip=f"{from_node['name'].values[0]} to {to_node['name'].values[0]}"
            ).add_to(node_layer)

    node_layer.add_to(m)

# Add layer control to toggle visibility of nodes and edges
folium.LayerControl(collapsed=False).add_to(m)

# Save map as HTML
m.save("Eigenvector_Centrality_map.html")


Inference for Eigenvector Centrality: Los Angeles, CA, San Francisco, CA, and Dallas/Fort Worth, TX exhibit high eigenvector centrality, suggesting they are not only well-connected but also connected to other high-ranking nodes. This highlights these cities' influence in the network, as they are connected to other central and well-connected hubs, amplifying their importance.

In [None]:
# Betweenness Centrality
import pandas as pd
import networkx as nx

# Load the reachability data
reachability_data = pd.read_csv('reformatted_reachability.txt', delim_whitespace=True)
reachability_meta = pd.read_csv('reachability-meta.csv')

# Create a directed graph
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate betweenness centrality
betweenness_centrality = nx.betweenness_centrality(G, weight='Weight')

# Map node IDs to names
id_to_name = dict(zip(reachability_meta['node_id'], reachability_meta['name']))

# Sort by betweenness centrality and select top 10 nodes
top_betweenness_centrality = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

# Display the results
print("BETWEENNESS CENTRALITY")
print(f"{'NODE NAME':<20} {'Betweenness Centrality':<20}")
print("-" * 40)
for node, centrality in top_betweenness_centrality:
    node_name = id_to_name.get(node, str(node))
    print(f"{node_name:<25} {centrality:.4f}")



The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\s+'`` instead



BETWEENNESS CENTRALITY
NODE NAME            Betweenness Centrality
----------------------------------------
Kona, HI                  0.9714
Kahului, HI               0.9699
Greenbrier, WV            0.9693
Telluride, CO             0.9690
Victoria, TX              0.9681
Buffalo, NY               0.9670
Manchester, NH            0.9669
Aspen, CO                 0.9648
Hilo, HI                  0.9648
Hartford, CT              0.9642


In [None]:
# Betweenness Centrality on Map
import pandas as pd
import networkx as nx
import folium
import geopandas as gpd
import random

# Load reachability and metadata files
reachability_data = pd.read_csv('reformatted_reachability.txt', sep='\s+')
reachability_meta = pd.read_csv('reachability-meta.csv')

# Merge reachability data with metadata to get coordinates for nodes
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='FromNodeId', right_on='node_id'
).rename(columns={'latitude': 'from_lat', 'longitude': 'from_lon'})
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='ToNodeId', right_on='node_id'
).rename(columns={'latitude': 'to_lat', 'longitude': 'to_lon'})

# Create directed graph
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate betweenness centrality and select top 10 nodes
betweenness_centrality = nx.betweenness_centrality(G, weight='Weight')
top_nodes = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

# Convert top nodes to DataFrame for plotting
top_nodes_df = pd.DataFrame(top_nodes, columns=['node_id', 'centrality'])
top_nodes_df = top_nodes_df.merge(reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
                                    on='node_id')

# Initialize Folium map centered on North America
m = folium.Map(location=[37.0902, -95.7129], zoom_start=4)

# Add base map layer for the Americas
americas_map = gpd.read_file('ne_110m_admin_0_countries.shp')
americas_map = americas_map[americas_map['CONTINENT'] == 'North America']
folium.GeoJson(americas_map).add_to(m)

# Create separate layers for each node and edges between them
for i, row in top_nodes_df.iterrows():
    node_layer = folium.FeatureGroup(name=f"{row['name']} - Betweenness Centrality: {row['centrality']:.4f}")

    # Add marker for the node
    folium.Marker(
        location=(row['latitude'], row['longitude']),
        popup=f"{row['name']}<br>Betweenness Centrality: {row['centrality']:.4f}",
        icon=folium.Icon(color="blue")
    ).add_to(node_layer)

    # Add edges between the nodes
    edges = reachability_data[(reachability_data['FromNodeId'] == row['node_id']) |
                              (reachability_data['ToNodeId'] == row['node_id'])]
    for _, edge in edges.iterrows():
        from_node = reachability_meta[reachability_meta['node_id'] == edge['FromNodeId']]
        to_node = reachability_meta[reachability_meta['node_id'] == edge['ToNodeId']]

        if not from_node.empty and not to_node.empty:
            folium.PolyLine(
                locations=[(from_node['latitude'].values[0], from_node['longitude'].values[0]),
                            (to_node['latitude'].values[0], to_node['longitude'].values[0])],
                color=random.choice(['red', 'green', 'blue', 'orange']),
                weight=2,
                tooltip=f"{from_node['name'].values[0]} to {to_node['name'].values[0]}"
            ).add_to(node_layer)

    node_layer.add_to(m)

# Add layer control to toggle visibility of nodes and edges
folium.LayerControl(collapsed=False).add_to(m)

# Save map as HTML
m.save("Betweenness_Centrality_map.html")


Inference for Betweenness Centrality: Locations like Kona, HI, and Kahului, HI, have high betweenness centrality, suggesting they act as critical transit points or "gateways" within the network. These cities likely facilitate connections between less central parts of the network, functioning as essential passage points for movement and communication, potentially having a large impact on the overall flow across the network.

In [2]:
# Katz Centrality
import pandas as pd
import networkx as nx

# Load the reachability data as a graph
reachability_data = pd.read_csv('reformatted_reachability.txt', delim_whitespace=True)
reachability_meta = pd.read_csv('reachability-meta.csv')

# Create a directed graph from reachability data
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate Katz Centrality
katz_centrality = nx.katz_centrality_numpy(G, weight='Weight', alpha=0.01, beta=1.0)

# Add Katz Centrality data to metadata and sort for top 10 nodes
reachability_meta['KatzCentrality'] = reachability_meta['node_id'].map(katz_centrality)
top_katz_centrality = reachability_meta.nlargest(10, 'KatzCentrality')

# Display the results
formatted_name = "Katz Centrality"
measure_name = "KatzCentrality"
print("Top 10 Nodes by Katz Centrality:")
print(f"{formatted_name.upper()}")
print(f"\t{'NODE NAME'.ljust(30)}\t{formatted_name}")
print("\t" + "-"*50)
for _, row in top_katz_centrality.iterrows():
    print(f"\t{row['name'].ljust(30)}\t{row[measure_name]:.16f}")


  reachability_data = pd.read_csv('reformatted_reachability.txt', delim_whitespace=True)


Top 10 Nodes by Katz Centrality:
KATZ CENTRALITY
	NODE NAME                     	Katz Centrality
	--------------------------------------------------
	Alliance, NE                  	0.2636083315033205
	Kenai, AK                     	0.1840938071323797
	Kearney, NE                   	0.1748748180545355
	Modesto, CA                   	0.1506024645953630
	Nantucket, MA                 	0.1421094658098384
	Kinston, NC                   	0.1395459750630394
	Windsor, ON                   	0.1356999356576483
	Sioux City, IA                	0.1278511318426642
	Johnstown, PA                 	0.1226887372282293
	Liberal, KS                   	0.1123072108683634


In [3]:
# Katz Centrality on Map
import pandas as pd
import networkx as nx
import folium
import geopandas as gpd
import random

# Load reachability and metadata files
reachability_data = pd.read_csv('reformatted_reachability.txt', sep='\s+')
reachability_meta = pd.read_csv('reachability-meta.csv')

# Merge reachability data with metadata to get coordinates for nodes
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='FromNodeId', right_on='node_id'
).rename(columns={'latitude': 'from_lat', 'longitude': 'from_lon'})
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='ToNodeId', right_on='node_id'
).rename(columns={'latitude': 'to_lat', 'longitude': 'to_lon'})

# Create directed graph
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate Katz Centrality and select top 10 nodes
katz_centrality = nx.katz_centrality_numpy(G, weight='Weight', alpha=0.01, beta=1.0)
top_nodes = sorted(katz_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

# Convert top nodes to DataFrame for plotting
top_nodes_df = pd.DataFrame(top_nodes, columns=['node_id', 'katz_centrality'])
top_nodes_df = top_nodes_df.merge(reachability_meta[['node_id', 'name', 'latitude', 'longitude']], on='node_id')

# Initialize Folium map centered on North America
m = folium.Map(location=[37.0902, -95.7129], zoom_start=4)

# Add base map layer for the Americas
americas_map = gpd.read_file('ne_110m_admin_0_countries.shp')
americas_map = americas_map[americas_map['CONTINENT'] == 'North America']
folium.GeoJson(americas_map).add_to(m)

# Create separate layers for each node and edges between them
for i, row in top_nodes_df.iterrows():
    node_layer = folium.FeatureGroup(name=f"{row['name']} - Katz Centrality: {row['katz_centrality']:.4f}")

    # Add marker for the node
    folium.Marker(
        location=(row['latitude'], row['longitude']),
        popup=f"{row['name']}<br>Katz Centrality: {row['katz_centrality']:.4f}",
        icon=folium.Icon(color="green")
    ).add_to(node_layer)

    # Add edges between the nodes
    edges = reachability_data[(reachability_data['FromNodeId'] == row['node_id']) |
                              (reachability_data['ToNodeId'] == row['node_id'])]
    for _, edge in edges.iterrows():
        from_node = reachability_meta[reachability_meta['node_id'] == edge['FromNodeId']]
        to_node = reachability_meta[reachability_meta['node_id'] == edge['ToNodeId']]

        if not from_node.empty and not to_node.empty:
            folium.PolyLine(
                locations=[(from_node['latitude'].values[0], from_node['longitude'].values[0]),
                           (to_node['latitude'].values[0], to_node['longitude'].values[0])],
                color=random.choice(['purple', 'blue', 'yellow', 'orange']),
                weight=2,
                tooltip=f"{from_node['name'].values[0]} to {to_node['name'].values[0]}"
            ).add_to(node_layer)

    node_layer.add_to(m)

# Add layer control to toggle visibility of nodes and edges
folium.LayerControl(collapsed=False).add_to(m)

# Save map as HTML
m.save("Katz_Centrality_map.html")


Inference for Katz Centrality: Nodes such as Alliance, NE, and Kenai, AK, have relatively high centrality values, signifying that they are influential not necessarily because of direct connections but due to their proximity to other well-connected nodes. These nodes may not connect to many destinations directly but are effectively placed near key hubs, allowing influence and communication to propagate effectively through them. Cities with high Katz Centrality likely play a role in amplifying or facilitating indirect connections, often bridging outlying areas with more central nodes. As a result, they contribute significantly to the cohesion of the network, helping to integrate more isolated areas by connecting through central nodes.