#Xây dựng đồ thị

In [None]:
import pandas as pd
import networkx as nx

# Assuming the dataset is provided in a file, let's load it into a DataFrame
# Since the image file cannot be processed, we would need a CSV or Excel file with the same structure mentioned in the text.

# Placeholder for dataset processing (mock example for dataset structure)
data = {
    # "Title": ["Post 1", "Post 2", "Post 3"],
    # "Author": ["Author A", "Author B", "Author C"],
    # "Category": ["Tech", "Health", "Education"],
    # "Comments Count": [2, 3, 1],
    # "Detailed Title": ["Detail 1", "Detail 2", "Detail 3"],
    # "Comments_Post": [
    #     [("Commenter A1", "Reply B1"), ("Commenter A2", "Reply B2")],
    #     [("Commenter B1", "Reply C1"), ("Commenter B2", "Reply D2"), ("Commenter B3", "Reply E3")],
    #     [("Commenter C1", "Reply F1")]
    # ],
}

df = pd.DataFrame(data)

# Constructing a graph from the data
G = nx.DiGraph()

# Adding nodes (authors, commenters, repliers) and their categories
for _, row in df.iterrows():
    G.add_node(row["Author"], category=row["Category"])
    for comment, reply in row["Comments_Post"]:
        G.add_node(comment, category="Commenter")
        G.add_node(reply, category="Replier")
        G.add_edge(row["Author"], comment)  # Author to Commenter
        G.add_edge(comment, reply)         # Commenter to Replier

# Calculating measures
density = nx.density(G)
closeness = nx.closeness_centrality(G)
betweenness = nx.betweenness_centrality(G)

# Preparing results
results = {
    "Density": density,
    "Closeness Centrality": closeness,
    "Betweenness Centrality": betweenness
}

results


#Phân cụm cộng đồng

In [None]:
from networkx.algorithms.community import louvain_communities, girvan_newman

# Applying Louvain method for community detection
louvain_communities_result = louvain_communities(G)

# Adding community information to nodes as a node attribute (Louvain)
for i, community in enumerate(louvain_communities_result):
    for node in community:
        G.nodes[node]['community_louvain'] = i

# Applying Girvan-Newman method for community detection
girvan_newman_communities = next(girvan_newman(G))  # First split
girvan_newman_mapping = {}
for i, community in enumerate(girvan_newman_communities):
    for node in community:
        girvan_newman_mapping[node] = i

# Adding community information to nodes as a node attribute (Girvan-Newman)
nx.set_node_attributes(G, girvan_newman_mapping, 'community_girvan_newman')

# Collecting node attributes to show communities
node_attributes = {node: G.nodes[node] for node in G.nodes()}
node_attributes


#Phân tích cụm

In [None]:
from collections import defaultdict

# Initializing a dictionary to store community information
community_users_louvain = defaultdict(list)
community_users_girvan_newman = defaultdict(list)

# Grouping users by community based on Louvain and Girvan-Newman results
for node, attributes in G.nodes(data=True):
    community_users_louvain[attributes['community_louvain']].append(
        {"user": node, "category": attributes.get("category", "Unknown")}
    )
    community_users_girvan_newman[attributes['community_girvan_newman']].append(
        {"user": node, "category": attributes.get("category", "Unknown")}
    )

# Summarizing categories within each community
community_topics_louvain = {
    community: list(set(user['category'] for user in users))
    for community, users in community_users_louvain.items()
}
community_topics_girvan_newman = {
    community: list(set(user['category'] for user in users))
    for community, users in community_users_girvan_newman.items()
}

# Combining results
analysis_result = {
    "Louvain": {
        "Community Users": community_users_louvain,
        "Community Topics": community_topics_louvain,
    },
    "Girvan-Newman": {
        "Community Users": community_users_girvan_newman,
        "Community Topics": community_topics_girvan_newman,
    },
}

import ace_tools as tools; tools.display_dataframe_to_user("Community Analysis", pd.DataFrame(analysis_result))


#Trực quan hóa

In [None]:
import matplotlib.pyplot as plt

# Assigning colors to Louvain communities for visualization
colors_louvain = [attributes['community_louvain'] for _, attributes in G.nodes(data=True)]

# Drawing the graph with Louvain communities
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G, seed=42)  # Positioning the nodes
nx.draw_networkx_nodes(G, pos, node_color=colors_louvain, cmap=plt.cm.tab20, node_size=500)
nx.draw_networkx_edges(G, pos, alpha=0.5)
nx.draw_networkx_labels(G, pos, font_size=10, font_color='black')
plt.title("Graph Visualization - Louvain Communities")
plt.show()

# Assigning colors to Girvan-Newman communities for visualization
colors_girvan_newman = [attributes['community_girvan_newman'] for _, attributes in G.nodes(data=True)]

# Drawing the graph with Girvan-Newman communities
plt.figure(figsize=(12, 8))
nx.draw_networkx_nodes(G, pos, node_color=colors_girvan_newman, cmap=plt.cm.tab20, node_size=500)
nx.draw_networkx_edges(G, pos, alpha=0.5)
nx.draw_networkx_labels(G, pos, font_size=10, font_color='black')
plt.title("Graph Visualization - Girvan-Newman Communities")
plt.show()
