## Libraries

In [4]:
import pandas as pd
import networkx as nx
import numpy as np

## Functions

In [10]:
def get_metrics(G):
    # Degree distribution
    degrees = [d for n, d in G.degree()]
    mean_degree = np.mean(degrees)
    var_degree = np.var(degrees)

    # Average clustering coefficient
    clustering = nx.average_clustering(G)

    # Average shortest path length (works only for connected graphs)
    avg_path_length = nx.average_shortest_path_length(G)

    # Assortativity coefficient
    assortativity = nx.degree_assortativity_coefficient(G)

    # Centralization proxy (standard deviation of degrees)
    degree_centralization = np.std(degrees)

    print(f"Mean degree: {mean_degree:.2f}")
    print(f"Degree variance: {var_degree:.2f}")
    print(f"Average clustering coefficient: {clustering:.4f}")
    print(f"Average path length: {avg_path_length:.4f}")
    print(f"Assortativity coefficient: {assortativity:.4f}")
    print(f"Degree centralization (std): {degree_centralization:.2f}")
    print(f"Number of nodes: {G.number_of_nodes()}")
    print(f"Number of edges: {G.number_of_edges()}")

## Load Data

In [2]:
# Read the file, skipping comment lines and using the last comment line as header
df = pd.read_csv('p2p-Gnutella04.txt', 
                 sep='\s+',            # Matches any whitespace
                 comment='#',          # Skip lines starting with #
                 header=None,          # Don't use any row as header
                 names=['FromNodeId', 'ToNodeId'])  # Set custom column names

# Display first few rows
df.head()

  sep='\s+',            # Matches any whitespace


Unnamed: 0,FromNodeId,ToNodeId
0,0,1
1,0,2
2,0,3
3,0,4
4,0,5


In [6]:
# Getting the unique nodes from both 

# Flat the columns into one list
id_list = df.values.flatten()

unique_nodes = np.unique(id_list)

print(f"Number of unique nodes: {len(unique_nodes)}")

Number of unique nodes: 10876


In [None]:
# Building the graph

# Adding the nodes
G = nx.Graph()

for node in unique_nodes:
    G.add_node(node)

# Adding the edges
for i, row in df.iterrows():
    G.add_edge(row['FromNodeId'], row['ToNodeId'])

Mean degree: 7.35
Degree variance: 48.65
Average clustering coefficient: 0.0062
Average path length: 4.6357
Assortativity coefficient: -0.0132
Degree centralization (std): 6.97


In [11]:
# Displaying metrics of the node
get_metrics(G)

Mean degree: 7.35
Degree variance: 48.65
Average clustering coefficient: 0.0062
Average path length: 4.6357
Assortativity coefficient: -0.0132
Degree centralization (std): 6.97
Number of nodes: 10876
Number of edges: 39994
