In [None]:
#Indegree and Outdegree
!pip install dash dash-core-components dash-html-components dash-bootstrap-components dash-table
import pandas as pd
import networkx as nx
import numpy as np
import dash
from dash import dcc, html, dash_table
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objs as go
from scipy.stats import gaussian_kde

# Load the city metadata
metadata = pd.read_csv("reachability-meta.csv")

# Load the directed reachability network, skipping the header
G = nx.read_edgelist("reformatted_reachability.txt", create_using=nx.DiGraph,
                     nodetype=int, data=(('weight', float),), comments='FromNodeId')

# Check basic network statistics
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()

# Calculate average in-degree and out-degree
in_degrees = [G.in_degree(n) for n in G.nodes()]
out_degrees = [G.out_degree(n) for n in G.nodes()]

avg_in_degree = sum(in_degrees) / num_nodes
avg_out_degree = sum(out_degrees) / num_nodes

# Degree distribution data for plotting
degree_data = pd.DataFrame({'In-Degree': in_degrees, 'Out-Degree': out_degrees})

# Average Clustering Coefficient
avg_clustering = nx.average_clustering(G)

# Degree Correlation (Assortativity)
degree_correlation = nx.degree_pearson_correlation_coefficient(G)

# Initialize Dash app
app = dash.Dash(__name__, external_stylesheets=['https://codepen.io/chriddyp/pen/bWLwgP.css'])
app.layout = html.Div([
    html.H1("Interactive Network Analysis"),
    html.Div([
        html.H3("Basic Network Statistics"),
        dash_table.DataTable(
            columns=[{"name": "Metric", "id": "Metric"}, {"name": "Value", "id": "Value"}],
            data=[
                {"Metric": "Number of Nodes", "Value": num_nodes},
                {"Metric": "Number of Edges", "Value": num_edges},
                {"Metric": "Average In-Degree", "Value": avg_in_degree},
                {"Metric": "Average Out-Degree", "Value": avg_out_degree},
                {"Metric": "Average Clustering Coefficient", "Value": avg_clustering},
                {"Metric": "Degree Correlation (Assortativity)", "Value": degree_correlation}
            ],
            style_cell={'textAlign': 'left', 'padding': '5px'},
            style_header={'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold'}
        )
    ]),
    html.Br(),

    html.Div([
        html.H3("Degree Distribution"),
        dcc.Graph(id="degree-distribution"),
        html.Label("Choose Degree Type:"),
        dcc.RadioItems(
            id="degree-type",
            options=[{"label": "In-Degree", "value": "In-Degree"},
                     {"label": "Out-Degree", "value": "Out-Degree"}],
            value="In-Degree",
            labelStyle={'display': 'inline-block'}
        ),
    ])
])

# Callbacks for interactivity
@app.callback(
    Output("degree-distribution", "figure"),
    Input("degree-type", "value")
)
def update_degree_distribution(degree_type):
    # Combine in-degree and out-degree
    degree_data_combined = degree_data.melt(value_vars=["In-Degree", "Out-Degree"], var_name="Degree Type", value_name="Degree")

    # Create a histogram with a smoothing line
    fig = px.histogram(degree_data_combined, x="Degree", color="Degree Type", barmode='overlay',
                       title="Degree Distribution", labels={'Degree': 'Degree', 'frequency': 'Frequency'})

    # Calculate and add smoothing line
    x = np.linspace(degree_data_combined['Degree'].min(), degree_data_combined['Degree'].max(), 100)
    for degree_type in degree_data_combined['Degree Type'].unique():
        degree_values = degree_data_combined[degree_data_combined['Degree Type'] == degree_type]['Degree']
        kde = gaussian_kde(degree_values)
        fig.add_trace(go.Scatter(x=x, y=kde(x) * len(degree_values) * (x[1] - x[0]),
                                  mode='lines', name=f'Smoothing {degree_type}', line=dict(width=2)))

    fig.update_layout(bargap=0.2)
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=False)




<IPython.core.display.Javascript object>

In [None]:
# Correlation between In-Degree and Out-Degree
!pip install dash dash-core-components dash-html-components dash-bootstrap-components dash-table
import pandas as pd
import networkx as nx
import numpy as np
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go

# Load the city metadata
metadata = pd.read_csv("reachability-meta.csv")

node_names = metadata.set_index('node_id')['name'].to_dict()
# Load the directed reachability network
G = nx.read_edgelist("reformatted_reachability.txt", create_using=nx.DiGraph,
                     nodetype=int, data=(('weight', float),), comments='FromNodeId')

# Calculate in-degrees and out-degrees
indegree_sequence = [d for n, d in G.in_degree()]
outdegree_sequence = [d for n, d in G.out_degree()]

# Create a list of node labels from the metadata
node_labels = {n: node_names.get(n, str(n)) for n in G.nodes()}

# Initialize Dash app
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Interactive Correlation between In-Degree and Out-Degree"),

    dcc.Graph(id='correlation-plot'),

    html.Div(id='reciprocity-info', style={'margin-top': '20px'})
])

@app.callback(
    Output('correlation-plot', 'figure'),
    Output('reciprocity-info', 'children'),
    Input('correlation-plot', 'id')  # This input is just a trigger
)
def update_graph(_):
    # Create the scatter plot for in-degrees and out-degrees
    fig = go.Figure()

    # Add scatter plot with hover info
    fig.add_trace(go.Scatter(
        x=indegree_sequence,
        y=outdegree_sequence,
        mode='markers',
        marker=dict(size=10, opacity=0.6, line=dict(width=1, color='white')),
        name='Data Points',
        hoverinfo='text',
        hovertemplate='<b>Node: %{text}</b><br>' +
                      'In-Degree: %{x}<br>' +
                      'Out-Degree: %{y}<br>' +
                      '<extra></extra>',
        text=[node_labels[n] for n in G.nodes()]  # Add node labels for hover
    ))

    # Fit a line to the data
    m, b = np.polyfit(np.array(indegree_sequence), np.array(outdegree_sequence), 1)
    fit_line = m * np.array(indegree_sequence) + b

    fig.add_trace(go.Scatter(
        x=indegree_sequence,
        y=fit_line,
        mode='lines',
        line=dict(color='red', width=2),
        name='Fit Line'
    ))

    # Calculate the reciprocity of the graph
    reciprocity_value = nx.reciprocity(G)

    # Update figure layout with increased size and modified titles
    fig.update_layout(
        title="The correlation between k_in and k_out in the Airline Route Network",
        xaxis_title="K_in",
        yaxis_title="K_out",
        xaxis=dict(range=[0, max(indegree_sequence) + 50]),
        yaxis=dict(range=[0, max(outdegree_sequence) + 50]),
        template="plotly_white",
        width=1400,
        height=1000
    )

    # Set the aspect ratio to square
    fig.update_yaxes(scaleanchor="x")  # Link y-axis to x-axis for equal scaling
    fig.update_xaxes(constrain='domain')

    return fig, f"Reciprocity (R) = {round(reciprocity_value, 4)}"

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)




<IPython.core.display.Javascript object>

In [None]:
# Reachability
import pandas as pd
import networkx as nx

# Load the reachability data as a graph
reachability_data = pd.read_csv('reformatted_reachability.txt', delim_whitespace=True)
reachability_meta = pd.read_csv('reachability-meta.csv')

# Create a directed graph from reachability data
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate reachability
reachability = {node: len(nx.single_source_shortest_path_length(G, node)) for node in G.nodes()}

# Add reachability data to metadata and sort for top 10 nodes
reachability_meta['Reachability'] = reachability_meta['node_id'].map(reachability)
top_reachability = reachability_meta.nlargest(10, 'Reachability')

# Display the results
formatted_name = "Reachability"
measure_name = "Reachability"
print("Top 10 Nodes by Reachability:")
print(f"{formatted_name.upper()}")
print(f"\t{'NODE NAME'.ljust(30)}\t{formatted_name}")
print("\t" + "-"*50)
for _, row in top_reachability.iterrows():
    print(f"\t{row['name'].ljust(30)}\t{row[measure_name]:.16f}")



The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\s+'`` instead



Top 10 Nodes by Reachability:
REACHABILITY
	NODE NAME                     	Reachability
	--------------------------------------------------
	Abbotsford, BC                	456.0000000000000000
	Aberdeen, SD                  	456.0000000000000000
	Abilene, TX                   	456.0000000000000000
	Akron/Canton, OH              	456.0000000000000000
	Alamosa, CO                   	456.0000000000000000
	Albany, GA                    	456.0000000000000000
	Albany, NY                    	456.0000000000000000
	Albuquerque, NM               	456.0000000000000000
	Alexandria, LA                	456.0000000000000000
	Allentown/Bethlehem/Easton, PA	456.0000000000000000


In [None]:
# Reachability on Map
import pandas as pd
import networkx as nx
import folium
import geopandas as gpd
import random

# Load reachability and metadata files
reachability_data = pd.read_csv('reformatted_reachability.txt', sep='\s+')
reachability_meta = pd.read_csv('reachability-meta.csv')

# Merge reachability data with metadata to get coordinates for nodes
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='FromNodeId', right_on='node_id'
).rename(columns={'latitude': 'from_lat', 'longitude': 'from_lon'})
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='ToNodeId', right_on='node_id'
).rename(columns={'latitude': 'to_lat', 'longitude': 'to_lon'})

# Create directed graph
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate reachability for each node and select top 10 nodes
reachability = {node: len(nx.single_source_shortest_path_length(G, node)) for node in G.nodes()}
top_nodes = sorted(reachability.items(), key=lambda x: x[1], reverse=True)[:10]

# Convert top nodes to DataFrame for plotting
top_nodes_df = pd.DataFrame(top_nodes, columns=['node_id', 'reachability'])
top_nodes_df = top_nodes_df.merge(reachability_meta[['node_id', 'name', 'latitude', 'longitude']], on='node_id')

# Initialize Folium map centered on North America
m = folium.Map(location=[37.0902, -95.7129], zoom_start=4)

# Add base map layer for the Americas
americas_map = gpd.read_file('ne_110m_admin_0_countries.shp')
americas_map = americas_map[americas_map['CONTINENT'] == 'North America']
folium.GeoJson(americas_map).add_to(m)

# Create separate layers for each node and edges between them
for i, row in top_nodes_df.iterrows():
    node_layer = folium.FeatureGroup(name=f"{row['name']} - Reachability: {row['reachability']}")

    # Add marker for the node
    folium.Marker(
        location=(row['latitude'], row['longitude']),
        popup=f"{row['name']}<br>Reachability: {row['reachability']}",
        icon=folium.Icon(color="purple")
    ).add_to(node_layer)

    # Add edges between the nodes
    edges = reachability_data[(reachability_data['FromNodeId'] == row['node_id']) |
                              (reachability_data['ToNodeId'] == row['node_id'])]
    for _, edge in edges.iterrows():
        from_node = reachability_meta[reachability_meta['node_id'] == edge['FromNodeId']]
        to_node = reachability_meta[reachability_meta['node_id'] == edge['ToNodeId']]

        if not from_node.empty and not to_node.empty:
            folium.PolyLine(
                locations=[(from_node['latitude'].values[0], from_node['longitude'].values[0]),
                           (to_node['latitude'].values[0], to_node['longitude'].values[0])],
                color=random.choice(['red', 'green', 'blue', 'orange']),
                weight=2,
                tooltip=f"{from_node['name'].values[0]} to {to_node['name'].values[0]}"
            ).add_to(node_layer)

    node_layer.add_to(m)

# Add layer control to toggle visibility of nodes and edges
folium.LayerControl(collapsed=False).add_to(m)

# Save map as HTML
m.save("Reachability_map.html")


Inference for Reachability: The high reachability scores for each node indicate that these locations have broad access to other parts of the network. This suggests that these cities serve as well-connected hubs, making it possible to reach a large portion of the network from these points. Consequently, these locations could be crucial for network accessibility, as they provide multiple paths and connections that facilitate movement and reduce travel limitations across the network. The uniformity in the reachability scores across these nodes also implies a robust and distributed structure where multiple cities maintain similar levels of connectivity, ensuring redundancy and flexibility within the network.

In [2]:
#Page Rank
import pandas as pd
import networkx as nx

# Load the reachability data as a graph
reachability_data = pd.read_csv('reformatted_reachability.txt', delim_whitespace=True)
reachability_meta = pd.read_csv('reachability-meta.csv')

# Create a directed graph from reachability data
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate PageRank
pagerank_scores = nx.pagerank(G, weight='Weight')

# Map node IDs to names using reachability-meta.csv
id_to_name = dict(zip(reachability_meta['node_id'], reachability_meta['name']))

# Display the results for top 10 nodes
top_pagerank = sorted(pagerank_scores.items(), key=lambda x: x[1], reverse=True)[:10]
print("PAGERANK SCORES")
print(f"{'NODE NAME':<20} {'PageRank':<10}")
print("-" * 30)
for node, score in top_pagerank:
    node_name = id_to_name.get(node, str(node))
    print(f"{node_name:<20} {score:.4f}")


  reachability_data = pd.read_csv('reformatted_reachability.txt', delim_whitespace=True)


PAGERANK SCORES
NODE NAME            PageRank  
------------------------------
Honolulu, HI         0.0082
Kahului, HI          0.0072
San Francisco, CA    0.0066
Anchorage, AK        0.0065
Los Angeles, CA      0.0062
Ft. Lauderdale, FL   0.0058
Las Vegas, NV        0.0058
Seattle/Tacoma, WA   0.0058
Portland, OR         0.0057
San Diego, CA        0.0057


In [6]:
#Page Rank on Map
import pandas as pd
import networkx as nx
import folium
import geopandas as gpd
import random

# Load reachability and metadata files
reachability_data = pd.read_csv('reformatted_reachability.txt', sep='\s+')
reachability_meta = pd.read_csv('reachability-meta.csv')

# Merge reachability data with metadata to get coordinates for nodes
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='FromNodeId', right_on='node_id'
).rename(columns={'latitude': 'from_lat', 'longitude': 'from_lon'})
reachability_data = reachability_data.merge(
    reachability_meta[['node_id', 'name', 'latitude', 'longitude']],
    left_on='ToNodeId', right_on='node_id'
).rename(columns={'latitude': 'to_lat', 'longitude': 'to_lon'})

# Create directed graph
G = nx.from_pandas_edgelist(reachability_data, 'FromNodeId', 'ToNodeId', ['Weight'], create_using=nx.DiGraph())

# Calculate PageRank scores for each node and select top 10 nodes
pagerank = nx.pagerank(G, weight='Weight')
top_nodes = sorted(pagerank.items(), key=lambda x: x[1], reverse=True)[:10]

# Convert top nodes to DataFrame for plotting
top_nodes_df = pd.DataFrame(top_nodes, columns=['node_id', 'pagerank'])
top_nodes_df = top_nodes_df.merge(reachability_meta[['node_id', 'name', 'latitude', 'longitude']], on='node_id')

# Initialize Folium map centered on North America
m = folium.Map(location=[37.0902, -95.7129], zoom_start=4)

# Add base map layer for the Americas
americas_map = gpd.read_file('ne_110m_admin_0_countries.shp')
americas_map = americas_map[americas_map['CONTINENT'] == 'North America']
folium.GeoJson(americas_map).add_to(m)

# Create separate layers for each node and edges between them
for i, row in top_nodes_df.iterrows():
    node_layer = folium.FeatureGroup(name=f"{row['name']} - PageRank: {row['pagerank']:.4f}")

    # Add marker for the node with size proportional to PageRank
    folium.CircleMarker(
        location=(row['latitude'], row['longitude']),
        radius=5 + row['pagerank'] * 50,  # Size relative to PageRank score
        color="purple",
        fill=True,
        fill_opacity=0.7,
        popup=f"{row['name']}<br>PageRank: {row['pagerank']:.4f}"
    ).add_to(node_layer)

    # Add edges between the nodes
    edges = reachability_data[(reachability_data['FromNodeId'] == row['node_id']) |
                              (reachability_data['ToNodeId'] == row['node_id'])]
    for _, edge in edges.iterrows():
        from_node = reachability_meta[reachability_meta['node_id'] == edge['FromNodeId']]
        to_node = reachability_meta[reachability_meta['node_id'] == edge['ToNodeId']]

        if not from_node.empty and not to_node.empty:
            folium.PolyLine(
                locations=[(from_node['latitude'].values[0], from_node['longitude'].values[0]),
                           (to_node['latitude'].values[0], to_node['longitude'].values[0])],
                color=random.choice(['red', 'green', 'blue', 'orange']),
                weight=2,
                tooltip=f"{from_node['name'].values[0]} to {to_node['name'].values[0]}"
            ).add_to(node_layer)

    node_layer.add_to(m)

# Add layer control to toggle visibility of nodes and edges
folium.LayerControl(collapsed=False).add_to(m)

# Save map as HTML
m.save("PageRank_map.html")


Inference for PageRank: The PageRank scores indicate the relative importance of each node within the network, with higher scores suggesting that certain locations are more "influential" in terms of connectivity and accessibility. The leading PageRank scores in this network belong to cities like Honolulu, HI, and Kahului, HI, implying that these cities are key hubs that likely receive significant traffic and serve as vital transit points. Their influence extends to facilitating connections not only within their immediate surroundings but also across broader parts of the network. Although the differences in PageRank are relatively small, this distribution implies a slightly hierarchical network structure. Cities with higher PageRank scores, such as Honolulu and San Francisco, act as primary connection points, enhancing network cohesion. Meanwhile, cities with lower, yet still similar, PageRank scores (like Las Vegas, NV, and Seattle, WA) serve as secondary hubs. This structure allows for both strong connectivity and resilience within the network, as multiple influential nodes can absorb traffic or connections if others become less accessible.