In [1]:
import pandas as pd
import numpy as np


# 读取 Excel 文件
df = pd.read_excel('field_activity_M.xlsx')


# 保留所需字段和 2023-12 至 2024-11 的所有字段
columns_to_keep = ['id', 'platform', 'repo_name', 'company'] + [col for col in df.columns if '2023-12' <= col <= '2024-11']
df_filtered = df[columns_to_keep]



fields_to_keep = ['Huawei', 'Alibaba', 'openKylin']
df_filtered = df_filtered[df_filtered['company'].isin(fields_to_keep)]


# 重置索引，避免 'field' 成为索引
df_filtered = df_filtered.reset_index(drop=True)


# 处理缺失数据
def handle_missing_data(group):
    # 将 0 视为缺失值，将其替换为 NaN
    group.loc[:, '2023-12':'2024-11'] = group.loc[:, '2023-12':'2024-11'].replace(0, np.nan)
    # 计算 2023-12 至 2024-11 期间缺失数据的个数
    missing_count = group.loc[:, '2023-12':'2024-11'].isna().sum(axis=1)
    # 若缺失数据个数 <= 3，则插值处理，否则删除该条数据
    group = group[missing_count <= 3]
    # 插值处理：线性插值
    group.loc[:, '2023-12':'2024-11'] = group.loc[:, '2023-12':'2024-11'].interpolate(method='linear', limit_direction='both')
    return group



df_filtered = df_filtered.groupby('company', group_keys=False).apply(handle_missing_data)


# 计算每个分组的平均值，保留两位小数
def mean_2023_2024(group):
    return group.loc[:, '2023-12':'2024-11'].mean().round(2)


df_avg = df_filtered.groupby('company', group_keys=False).apply(mean_2023_2024)


# 计算 2023-12 至 2024-11 所有列的总活跃度
df_filtered['total_activity'] = df_filtered.loc[:, '2023-12':'2024-11'].sum(axis=1)


# 根据总活跃度排序，选择前 5 名
df_top_5 = df_filtered.groupby('company', group_keys=False).apply(lambda x: x.nlargest(5, 'total_activity'))


# 输出结果
print("分组平均值：")
print(df_avg)
print("\n每组 top 5：")
print(df_top_5)

分组平均值：
           2023-12  2024-01  2024-02  2024-03  2024-04  2024-05  2024-06  \
company                                                                    
Alibaba       8.61     5.33     5.99    12.05    12.06     7.24     5.10   
Huawei        5.85     5.04     4.56     6.40     6.37     5.83     5.73   
openKylin    24.58    20.36    16.50    25.34    29.31    25.39    24.14   

           2024-07  2024-08  2024-09  2024-10  2024-11  
company                                                 
Alibaba       5.90     7.05     4.73     4.22     3.57  
Huawei        5.24     5.69     5.79     4.71     4.62  
openKylin    26.76    28.47    28.53    24.71    27.46  

每组 top 5：
             id platform                             repo_name    company  \
13619  23271574    gitee           anolis-challenge/summer2022    Alibaba   
13663  23397622    gitee                 src-openeuler/buildah    Alibaba   
15728   6959873    gitee                 src-openeuler/libtiff    Alibaba   
15154   

In [2]:

df_avg

Unnamed: 0_level_0,2023-12,2024-01,2024-02,2024-03,2024-04,2024-05,2024-06,2024-07,2024-08,2024-09,2024-10,2024-11
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Alibaba,8.61,5.33,5.99,12.05,12.06,7.24,5.1,5.9,7.05,4.73,4.22,3.57
Huawei,5.85,5.04,4.56,6.4,6.37,5.83,5.73,5.24,5.69,5.79,4.71,4.62
openKylin,24.58,20.36,16.5,25.34,29.31,25.39,24.14,26.76,28.47,28.53,24.71,27.46


In [3]:
df_top_5

Unnamed: 0,id,platform,repo_name,company,2023-12,2024-01,2024-02,2024-03,2024-04,2024-05,2024-06,2024-07,2024-08,2024-09,2024-10,2024-11,total_activity
13619,23271574,gitee,anolis-challenge/summer2022,Alibaba,9.5,8.55,15.1,42.03,48.18,13.23,12.95,14.44,12.52,9.72,11.75,16.5,214.47
13663,23397622,gitee,src-openeuler/buildah,Alibaba,13.375,8.61,11.3,11.24,16.14,7.625,4.25,1.95,3.38,6.963333,3.97,1.81,90.613333
15728,6959873,gitee,src-openeuler/libtiff,Alibaba,16.49,5.31,0.72,2.77,3.5,12.24,2.9,14.7,20.64,6.07,2.758,1.43,89.528
15154,9932270,gitee,src-openeuler/python-jwcrypto,Alibaba,17.25,4.6,0.72,15.61,4.98,2.02,1.43,2.02,6.8,4.206667,3.566,2.05,65.252667
17121,11572642,gitee,src-openeuler/git-lfs,Alibaba,2.22,1.78,7.64,4.64,6.67,4.23,2.97,2.67,2.02,3.97,1.95,0.72,41.48
4036,13516456,gitee,src-openeuler/chromium,Huawei,13.01,19.81,14.34,29.8,9.32,9.35,10.31,12.41,13.22,37.91,13.07,7.69,190.24
1484,15736142,gitee,src-openeuler/ffmpeg,Huawei,4.69,4.02,11.84,2.02,18.89,5.59,44.03,43.7,23.22,16.04,4.4,11.13,189.57
1407,7117195,gitee,openeuler/openeuler-jenkins,Huawei,13.05,19.76,9.16,11.24,18.82,7.41,5.96,19.93,26.33,9.88,6.45,12.23,160.22
1153,7428316,gitee,src-openeuler/wireshark,Huawei,2.74,16.73,6.32,14.54,3.9,18.01,4.3,3.19,18.97,9.05,26.62,12.94,137.31
1531,12758843,gitee,mindspore/serving,Huawei,20.11,29.55,11.73,21.89,2.47,9.11,2.68,4.75,5.37,6.26,12.72,9.57,136.21
