In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

# 文件路径
activity_file_path = r"C:\Users\Lenovo\Desktop\数据导论\activity_2022.csv"
china_file_path = r"C:\Users\Lenovo\Desktop\数据导论\china_2022.csv"
global_file_path = r"C:\Users\Lenovo\Desktop\数据导论\global_2022.csv"

# 1. 计算美国排名前十项目的平均增长率
def calculate_avg_growth_rate(file_path):
    activity_data = pd.read_csv(file_path)
    usa_top_10 = activity_data.nsmallest(10, 'Rank')  # 假设数据已按排名包含
    usa_top_10['GrowthRate'] = (usa_top_10['202212_value'] - usa_top_10['20221_value']) / usa_top_10['20221_value'] * 100
    avg_growth_rate = usa_top_10['GrowthRate'].mean()
    return avg_growth_rate

# 2. 比较中国与全球开源领域排名前十企业的数据差异
def compare_china_global(china_file, global_file):
    china_data = pd.read_csv(china_file)
    global_data = pd.read_csv(global_file)
    
    stats = {
        "China Mean": china_data.mean(),
        "Global Mean": global_data.mean(),
        "China Std": china_data.std(),
        "Global Std": global_data.std()
    }
    return stats

# 3. 贝叶斯定理问题
def bayes_theorem():
    # 先验概率
    P_A = 0.25  # 非软件型项目
    P_B_given_A = 0.85  # 带有 HTML/Markdown 标签的非软件型项目
    P_A_comp = 0.75  # 软件型项目
    P_B_given_A_comp = 0.10  # 带有 HTML/Markdown 标签的非非软件型项目
    
    # 全概率公式计算 P(B)
    P_B = P_B_given_A * P_A + P_B_given_A_comp * P_A_comp
    
    # 贝叶斯公式计算 P(A|B)
    P_A_given_B = (P_B_given_A * P_A) / P_B
    
    # 问题 2 数据
    P_A_js = 0.35  # 工具组件型项目的 JavaScript 比例
    P_A_comp_js = 0.10  # 非工具组件型 JavaScript 比例
    P_B_js = 0.50 * 0.35 + 0.50 * 0.10  # 全概率公式
    
    # 工具组件型项目概率
    P_A_given_B_js = (P_A_js * 0.50) / P_B_js
    
    return P_A_given_B, P_A_given_B_js

# 4. 无向图可视化
def visualize_undirected_graph():
    user = [1, 2, 3, 4]
    edge = [(1, 2), (2, 3), (3, 4), (4, 1)]
    G = nx.Graph()
    G.add_nodes_from(user)
    G.add_edges_from(edge)
    nx.draw(G, with_labels=True, node_color='lightblue', node_size=500, font_size=15)
    plt.title("Undirected Graph")
    plt.show()

# 5. 有向图可视化
def visualize_directed_graph():
    users = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    edges = [(0, 1), (1, 0), (0, 2), (2, 0), (1, 2), (2, 1), (1, 3), (2, 3), 
             (3, 4), (5, 4), (5, 6), (7, 5), (6, 8), (8, 7), (8, 9)]
    G = nx.DiGraph()
    G.add_nodes_from(users)
    G.add_edges_from(edges)
    nx.draw(G, with_labels=True, node_color='lightgreen', node_size=500, font_size=10)
    plt.title("Directed Graph")
    plt.show()

# 6. PageRank计算与可视化
def visualize_pagerank():
    users = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    edges = [(0, 1), (1, 0), (0, 2), (2, 0), (1, 2), (2, 1), (1, 3), (2, 3), 
             (3, 4), (5, 4), (5, 6), (7, 5), (6, 8), (8, 7), (8, 9)]
    G = nx.DiGraph()
    G.add_nodes_from(users)
    G.add_edges_from(edges)
    
    # PageRank计算
    pr = nx.pagerank(G)
    
    # 可视化大小调整
    sizes = [pr[node] * 5000 for node in G.nodes()]
    nx.draw(G, with_labels=True, node_color='lightcoral', node_size=sizes, font_size=10)
    plt.title("PageRank Visualization")
    plt.show()
    return pr

# 调用各函数
avg_growth_rate = calculate_avg_growth_rate(activity_file_path)
print("美国排名前十项目的平均增长率:", avg_growth_rate)

stats = compare_china_global(china_file_path, global_file_path)
print("中美企业数据对比统计:", stats)

P_HTML, P_JS = bayes_theorem()
print("带有 HTML 标签是非软件型的概率:", P_HTML)
print("由 JavaScript 编写是工具组件型的概率:", P_JS)

visualize_undirected_graph()
visualize_directed_graph()

pagerank_results = visualize_pagerank()
print("PageRank 结果:", pagerank_results)


ModuleNotFoundError: No module named 'pandas'