# Distance, assortativity and random walks

In [94]:
import math

import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import seaborn as sns

from tqdm import tqdm

### Shortest Paths Matrix

In [134]:
def get_shortest_paths_list(G: nx.Graph) -> list[int]:
    shortest_paths_list = []
    node_list = list(G.nodes)
    for i in tqdm(node_list):
        for j in node_list:
            if i < j:
                shortest_paths_list.append(
                    len(nx.shortest_path(usairport, i, j)) - 1
                )
    return shortest_paths_list

### Degree Distribution

In [107]:
def get_degree_distribution(degree_list):
    hist, bins = np.histogram(
        degree_list, 
        bins=max(degree_list) - min(degree_list),
        density=True
    )
    return dict(zip(bins[:-1], hist))

### Shannon Entropy

In [140]:
get_shannon_entropy = lambda x: np.sum([-x[k]*math.log(x[k], 2) if x[k] > 0 else 0 for k in x])

**Question 1:** compute Average Shortest Path Lenght and Diameter. Use only biggest connected component and remove loops 

In [16]:
hamster_raw = nx.read_edgelist('data/hamsterster.txt')
hamster_raw.remove_edges_from(nx.selfloop_edges(hamster_raw))
hamster_raw = hamster_raw.to_undirected()
components_list = sorted(nx.connected_components(hamster_raw), key=len, reverse=True)
hamster = hamster_raw.subgraph(components_list[0])

In [17]:
avg_shortest_path = nx.average_shortest_path_length(hamster)
diameter = nx.diameter(hamster)

In [18]:
print(f'Average SPL = {avg_shortest_path}; Diameter = {diameter}')

Average SPL = 3.452640184078649; Diameter = 14


**Question 2:** Mean and variance of SPL in the USAirport500 Network

In [142]:
usairport_raw = nx.read_weighted_edgelist("data/USairport500.txt")
components_list = sorted(nx.connected_components(usairport_raw), key=len, reverse=True)
usairport = usairport_raw.subgraph(components_list[0])
shortest_paths_list = get_shortest_paths_list(usairport)
avg_shortest_path = np.mean(shortest_paths_list)
variance_shortest_path = np.var(shortest_paths_list)
print(f'Average SPL = {avg_shortest_path}; Var SPL = {variance_shortest_path}')

  0%|          | 0/500 [00:00<?, ?it/s]

100%|██████████| 500/500 [00:06<00:00, 80.65it/s] 

Average SPL = 2.9910300601202406; Var SPL = 0.8175467946875716





**Question 3:** USAirport500 SPL Shannon Entropy

In [144]:
spl_distribution = get_degree_distribution(shortest_paths_list)
shannon_entropy = get_shannon_entropy(spl_distribution)
print(f'SPL Shannon Entropy = {shannon_entropy}')

SPL Shannon Entropy = 1.8830186387901517


**Question 4:** Advogato Assortativity

In [129]:
advogato_raw = nx.read_weighted_edgelist("data/advogato.txt")
components_list = sorted(nx.connected_components(advogato_raw), key=len, reverse=True)
advogato = advogato_raw.subgraph(components_list[0])

In [131]:
assortativity = nx.degree_assortativity_coefficient(advogato)
print(f'Advogato Assortativity = {assortativity}')

Advogato Assortativity = -0.08455192594199314


**Question 5:** Pearson Correlation between knn(k) and k

In [132]:
word_adjacencies_raw = nx.read_weighted_edgelist("data/word_adjacencies.txt")
components_list = sorted(nx.connected_components(word_adjacencies_raw), key=len, reverse=True)
word_adjacencies = word_adjacencies_raw.subgraph(components_list[0])

In [133]:
assortativity = nx.degree_assortativity_coefficient(word_adjacencies)
print(f'Advogato Assortativity = {assortativity}')

Advogato Assortativity = -0.1293478534390013
