In [1]:
import pandas as pd
import networkx as nx
import random

# Load the data
features_path = '/home/achoo/Desktop/GroupHw/large_twitch_features.csv'
edges_path = '/home/achoo/Desktop/GroupHw/large_twitch_edges.csv'

features_df = pd.read_csv(features_path)
edges_df = pd.read_csv(edges_path)

In [2]:
# Construct the graph and extract the LCC
G = nx.Graph()
edges = edges_df[['numeric_id_1', 'numeric_id_2']].values
G.add_edges_from(edges)

In [3]:
# Extract the largest connected component
largest_cc = max(nx.connected_components(G), key=len)
G_lcc = G.subgraph(largest_cc).copy()

In [4]:
# Calculate size and average degree for the LCC
size_lcc = G_lcc.number_of_nodes()
avg_degree_lcc = sum(dict(G_lcc.degree()).values()) / size_lcc
m = int(avg_degree_lcc / 2)

In [8]:
print(f"LCC Size: {size_lcc}, Avg Degree: {avg_degree_lcc}, Edges per new node (m): {m}")

LCC Size: 168114, Avg Degree: 80.86842261798542, Edges per new node (m): 40


In [9]:
# Implement the Barabási-Albert (BA) algorithm
def barabasi_albert_graph(n, m):
    """
    Generate a Barabási-Albert graph with n nodes and m edges per new node.
    """
    # Step 1: Initialize with a fully connected network of (m+1) nodes
    graph = {i: set(range(i)) - {i} for i in range(m + 1)}
    all_nodes = list(range(m + 1)) * m  # Initial attachment list based on degree

    # Step 2: Add nodes one at a time with m connections
    for new_node in range(m + 1, n):
        graph[new_node] = set()
        targets = set()
        
        while len(targets) < m:
            target = random.choice(all_nodes)
            if target not in targets:
                targets.add(target)
                graph[new_node].add(target)
                graph[target].add(new_node)
        
        # Update attachment list with new connections
        all_nodes.extend(targets)
        all_nodes.extend([new_node] * m)

    return graph

In [None]:
# Generate the BA model with LCC size and average degree parameters
ba_graph = barabasi_albert_graph(size_lcc, m)

In [None]:
# Calculate Metrics for the Simulated BA Model
def calculate_metrics(graph):
    """Calculate basic metrics for an undirected graph represented as an adjacency list."""
    num_nodes = len(graph)
    degrees = [len(neighbors) for neighbors in graph.values()]
    avg_degree = sum(degrees) / num_nodes

    # Calculate clustering coefficient
    triangles = 0
    triplets = 0
    for node, neighbors in graph.items():
        if len(neighbors) < 2:
            continue
        for neighbor in neighbors:
            common_neighbors = neighbors.intersection(graph[neighbor])
            triangles += len(common_neighbors)
            triplets += len(neighbors) - 1

    clustering_coefficient = triangles / triplets if triplets > 0 else 0

    return {
        "Size": num_nodes,
        "Average Degree": avg_degree,
        "Clustering Coefficient": clustering_coefficient
    }

In [None]:
# Display metrics of the BA model
metrics_ba = calculate_metrics(ba_graph)
print("BA Model Metrics:", metrics_ba)