In [1]:
import praw
import networkx as nx
from pyvis.network import Network
import community as community_louvain
import matplotlib.pyplot as plt
import pandas as pd

# Initialize Reddit API
reddit = praw.Reddit(
    client_id='Your Client Id,
    client_secret='Your Secret Code',
    user_agent='Your Agent Id'
)

def create_network(subreddit_name, num_posts, node_limit):
    subreddit = reddit.subreddit(subreddit_name)
    nodes = set()
    edges = []
    for submission in subreddit.hot(limit=num_posts):
        if submission.author:
            author = submission.author.name
            nodes.add(author)
            submission.comments.replace_more(limit=0)
            for comment in submission.comments.list():
                if comment.author:
                    commenter = comment.author.name
                    nodes.add(commenter)
                    if commenter != author:
                        edges.append((author, commenter))
    return nodes, edges

subreddit_name = 'askreddit'
num_posts = 10
node_limit = 200

nodes, edges = create_network(subreddit_name, num_posts, node_limit)

if nodes and edges:
    G = nx.Graph()
    G.add_nodes_from(nodes)
    G.add_edges_from(edges)

    partition = community_louvain.best_partition(G)
    num_communities = len(set(partition.values()))
    community_colors = {i: f"hsl({int(360 * i / num_communities)}, 100%, 70%)" for i in range(num_communities)}

    # Compute centrality measures
    pagerank = nx.pagerank(G)
    betweenness = nx.betweenness_centrality(G)
    closeness = nx.closeness_centrality(G)
    eigenvector = nx.eigenvector_centrality(G, max_iter=1000)
    clustering_coefficients = nx.clustering(G)

    # Identify top 10 influencers based on PageRank
    influencers = sorted(pagerank, key=pagerank.get, reverse=True)[:10]

    # Create a DataFrame for tabular output
    df = pd.DataFrame({
        "User": list(pagerank.keys()),
        "PageRank": [pagerank[user] for user in pagerank],
        "Betweenness Centrality": [betweenness[user] for user in betweenness],
        "Closeness Centrality": [closeness[user] for user in closeness],
        "Eigenvector Centrality": [eigenvector[user] for user in eigenvector],
        "Clustering Coefficient": [clustering_coefficients[user] for user in clustering_coefficients]
    })

    # Sort DataFrame by PageRank
    df = df.sort_values(by="PageRank", ascending=False)

    # Display the table in a readable format
    print("\nTop 10 Influential Users :")
    print(df.head(10).to_string(index=False))

    # Save the table to a CSV file
    df.to_csv("network_centrality_metrics.csv", index=False)

    influencers_header = "Top Influencers: " + ", ".join(set(influencers))

    # Create network visualization
    net = Network(height="750px", width="100%", bgcolor="#222222", font_color="white", heading=influencers_header)
    net.barnes_hut()

    for node in nodes:
        community_id = partition[node]
        color = community_colors[community_id]
        size = 35 if node in influencers else 15  # Bigger size for influencers
        title = f"Community: {community_id}<br>PageRank: {pagerank[node]:.4f}"
        if node in influencers:
            title += "<br>Influencer"
        net.add_node(node, label=node, title=title, color=color, size=size)

    for edge in edges:
        net.add_edge(edge[0], edge[1])

    # Save the interactive network visualization
    net.save_graph("reddit_network.html")

    # Plot Degree Distribution
    degrees = [G.degree(n) for n in G.nodes()]
    plt.figure(figsize=(10, 5))
    plt.hist(degrees, bins=20, color='skyblue', edgecolor='black')
    plt.xlabel("Degree")
    plt.ylabel("Frequency")
    plt.title("Degree Distribution of Reddit User Network")
    plt.savefig("degree_distribution.png")
    plt.close()

    # # Save all centrality measures to a text file
    # with open("centrality_measures.txt", "w") as f:
    #     f.write(df.to_string(index=False))

else:
    print("No data retrieved from Reddit.")


Version 7.7.1 of praw is outdated. Version 7.8.1 was released Friday October 25, 2024.



Top 10 Influential Users :
                User  PageRank  Betweenness Centrality  Closeness Centrality  Eigenvector Centrality  Clustering Coefficient
         GladCow3176  0.051759                0.214446              0.314395                0.519080                0.000000
            lvlyvlyn  0.050599                0.232526              0.361019                0.435163                0.000000
        nooneshome00  0.049913                0.209630              0.333860                0.061666                0.000000
    No_Jackfruit_890  0.048941                0.205592              0.345267                0.147802                0.000031
     Cloudy_DreamzZz  0.047658                0.200911              0.316130                0.038393                0.000000
          westsydmum  0.046816                0.196908              0.332257                0.027919                0.000000
 Aggressive_Goat2028  0.046469                0.201447              0.369293                0.089