In [2]:
# 本次所用的数据是 “top300项目仓库的Opendigger指标数据”
# 由于每个项目可能对应多个仓库，且担心部分指标值存在缺漏
# 为了保证最终可视化的一致性，先对项目进行筛选
# 选出只有1个仓库，且具有所需指标值的项目
# 将这些项目的数据添加到总表中

import os
import json
import pymysql

# 连接到 MySQL 数据库 (使用时需要更新自身的MySQL信息)
conn = pymysql.connect(
    host='localhost',
    user='root',
    password='cptbtptp789',
    database='project_data',
    charset='utf8mb4'
)

cursor = conn.cursor()

# 创建新表用于存储筛选后的数据
create_table_query = """
CREATE TABLE IF NOT EXISTS filtered_project_metrics (
    id INT AUTO_INCREMENT PRIMARY KEY,
    project_name VARCHAR(255) NOT NULL,
    metric_name VARCHAR(255) NOT NULL,
    metric_value JSON
)
"""
try:
    cursor.execute(create_table_query)
    print("表 filtered_project_metrics 创建成功")
except pymysql.Error as e:
    print(f"创建表时出错: {e}")


# 需要检查的指标文件列表
required_files = [
   'stars.json', 'technical_fork.json', 'participants.json', 'openrank.json',
    'activity.json', 'attention.json', 'change_requests.json', 'issue_comments.json',
    'code_change_lines_sum.json'
]


# 遍历 300 个文件夹
base_dir = 'C:/Users/22187/Desktop/神探/Python/top_300_metrics/top_300_metrics'  # 请替换为实际的文件夹路径
for top_level_folder in os.listdir(base_dir):
    top_level_path = os.path.join(base_dir, top_level_folder)
    if os.path.isdir(top_level_path):
        # 确保顶级文件夹下只有一个二级文件夹  (即检查项目是否只有一个仓库)
        second_level_folders = [f for f in os.listdir(top_level_path) if os.path.isdir(os.path.join(top_level_path, f))]
        if len(second_level_folders) == 1:
            second_level_path = os.path.join(top_level_path, second_level_folders[0])
            # 检查二级文件夹中是否包含所有必需的指标文件 
            existing_files = os.listdir(second_level_path)
            if all(file in existing_files for file in required_files):
                enterprise_name = top_level_folder
                for json_file in required_files:
                    json_path = os.path.join(second_level_path, json_file)
                    metric_name = os.path.splitext(json_file)[0]
                    with open(json_path, 'r', encoding='utf-8') as f:
                        metric_value = json.load(f)
                    # 插入数据到 MySQL
                    try:
                        sql = "INSERT INTO filtered_project_metrics (project_name, metric_name, metric_value) VALUES (%s, %s, %s)"
                        cursor.execute(sql, (enterprise_name, metric_name, json.dumps(metric_value)))
                        conn.commit()
                    except pymysql.Error as e:
                        print(f"Error inserting data for {enterprise_name} - {metric_name}: {e}")
                    except json.JSONDecodeError as e:
                        print(f"Error decoding JSON file {json_path}: {e}")


# 关闭连接
cursor.close()
conn.close()



表 filtered_project_metrics 创建成功
