## GitHub

In [1]:
import pandas as pd

# 读取 repo_field_issue_Q.xlsx 文件
def read_and_process_data(file_path):
    # 读取 Excel 文件
    df = pd.read_excel(file_path)
    
    # 处理 field 字段：保留 field 为数据并删除 0 值的数据
    df_filtered = df[df['field'].notna()]  # 保留有 field 值的行
    df_filtered = df_filtered[df_filtered['field'] != 0]  # 删除值为0的行
    
    return df_filtered

# 按照 "Database"、"Artificial Intelligence"、"Cloud Native" 分组，并统计 2024Q3 字段值的占比
def group_and_calculate(df):
    # 定义分类区间
    bins = [0, 15, 30, float('inf')]
    labels = ['0-15', '15-30', '30+']

    # 按 field 分组处理
    grouped = df.groupby('field')

    # 统计每个分组的 2024_Q3 字段值在不同区间的占比
    result = {}
    
    for field_value, group in grouped:
        # 根据 2024Q3 字段值划分到不同区间
        group['2024Q3_category'] = pd.cut(group['2024Q3'], bins=bins, labels=labels, right=False)
        
        # 计算每个区间的占比
        category_counts = group['2024Q3_category'].value_counts(normalize=True) * 100  # 占比百分比
        result[field_value] = category_counts.to_dict()
    
    return result

# 打印统计结果
def print_results(results):
    for field_value, category_counts in results.items():
        print(f"Field: {field_value}")
        for category, percentage in category_counts.items():
            print(f"  {category}: {percentage:.2f}%")
        print("-" * 40)



In [4]:

# 读取并处理数据
file_path = 'data/repo_field_issue_Q.xlsx'
df_filtered = read_and_process_data(file_path)

# 按照 field 分组并计算 2024Q3 字段的占比
results = group_and_calculate(df_filtered)

# 打印结果
print_results(results)



Field: Artificial Intelligence
  0-15: 84.04%
  15-30: 9.98%
  30+: 5.99%
----------------------------------------
Field: Big Data
  0-15: 74.14%
  15-30: 22.41%
  30+: 3.45%
----------------------------------------
Field: Block Chain
  0-15: 72.50%
  15-30: 20.00%
  30+: 7.50%
----------------------------------------
Field: Cloud Native
  0-15: 64.82%
  15-30: 23.92%
  30+: 11.27%
----------------------------------------
Field: Database
  0-15: 83.16%
  15-30: 13.48%
  30+: 3.37%
----------------------------------------
Field: Frontend
  0-15: 82.88%
  15-30: 14.41%
  30+: 2.70%
----------------------------------------
Field: Internet of Things
  0-15: 80.00%
  15-30: 17.14%
  30+: 2.86%
----------------------------------------
Field: Operating System
  0-15: 90.70%
  15-30: 4.65%
  30+: 4.65%
----------------------------------------
Field: Registries & Application Delivery
  30+: 100.00%
  0-15: 0.00%
  15-30: 0.00%
----------------------------------------


## Gitee

In [13]:
import pandas as pd


def read_and_process_data(file_path):
    # 读取 Excel 文件
    df = pd.read_excel(file_path)
    
    df_filtered = df[df['company'].notna()]  
    df_filtered = df_filtered[df_filtered['company'] != 0]  # 删除值为0的行
    
    return df_filtered


def group_and_calculate(df):
    # 定义分类区间
    bins = [0, 5, 10, float('inf')]
    labels = ['0-5', '5-10', '10+']


    grouped = df.groupby('company')

    # 统计每个分组的 2024_Q3 字段值在不同区间的占比
    result = {}
    
    for field_value, group in grouped:
        # 根据 2024Q3 字段值划分到不同区间
        group['2024Q3_category'] = pd.cut(group['2024Q3'], bins=bins, labels=labels, right=False)
        
        # 计算每个区间的占比
        category_counts = group['2024Q3_category'].value_counts(normalize=True) * 100  # 占比百分比
        result[field_value] = category_counts.to_dict()
    
    return result

# 打印统计结果
def print_results(results):
    for field_value, category_counts in results.items():
        if field_value in ["Huawei","Alibaba","openKylin"]:
            print(f"Field: {field_value}")
            for category, percentage in category_counts.items():
                print(f"  {category}: {percentage:.2f}%")
            print("-" * 40)



In [14]:

# 读取并处理数据
file_path = 'data/repo_field_issue_Q2.xlsx'
df_filtered = read_and_process_data(file_path)

# 按照 field 分组并计算 2024Q3 字段的占比
results = group_and_calculate(df_filtered)

# 打印结果
print_results(results)



Field: Alibaba
  0-5: 99.56%
  10+: 0.27%
  5-10: 0.17%
----------------------------------------
Field: Huawei
  0-5: 95.42%
  10+: 3.30%
  5-10: 1.28%
----------------------------------------
Field: openKylin
  0-5: 59.25%
  10+: 25.79%
  5-10: 14.95%
----------------------------------------
