In [16]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(42)
theme_name = "Cultural Engagement Distribution by Category (2024)"
big_theme_name = "Cultural Trends and Influences"

# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Cultural Participation": {
                    "children": {
                        "Art Festivals": {
                            "children": {
                                "Local Festivals": {
                                    "children": {
                                        "Community Parades": {"children": {}},
                                        "Street Performances": {"children": {}},
                                        "Open-air Exhibitions": {"children": {}}
                                    }
                                },
                                "International Events": {
                                    "children": {
                                        "Biennale Showcases": {"children": {}},
                                        "Global Film Weeks": {"children": {}},
                                        "Cultural Diplomacy Days": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Heritage Engagement": {
                            "children": {
                                "Museum Visits": {
                                    "children": {
                                        "History Museums": {"children": {}},
                                        "Art Museums": {"children": {}},
                                        "Science Centers": {"children": {}}
                                    }
                                },
                                "Site Tours": {
                                    "children": {
                                        "Ancient Temples": {"children": {}},
                                        "Historic Villages": {"children": {}},
                                        "UNESCO Sites": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Creative Expression": {
                    "children": {
                        "Performing Arts": {
                            "children": {
                                "Theatre": {
                                    "children": {
                                        "Drama": {"children": {}},
                                        "Opera": {"children": {}},
                                        "Experimental Theatre": {"children": {}}
                                    }
                                },
                                "Dance": {
                                    "children": {
                                        "Ballet": {"children": {}},
                                        "Folk Dance": {"children": {}},
                                        "Contemporary": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Visual Arts": {
                            "children": {
                                "Painting": {
                                    "children": {
                                        "Oil Painting": {"children": {}},
                                        "Watercolor": {"children": {}},
                                        "Ink Art": {"children": {}}
                                    }
                                },
                                "Sculpture": {
                                    "children": {
                                        "Stone Sculpture": {"children": {}},
                                        "Metal Works": {"children": {}},
                                        "Public Installations": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Digital Culture": {
                    "children": {
                        "Online Exhibitions": {
                            "children": {
                                "Virtual Museums": {
                                    "children": {
                                        "3D Tours": {"children": {}},
                                        "AI Curation": {"children": {}},
                                        "Remote Participation": {"children": {}}
                                    }
                                },
                                "NFT Artworks": {
                                    "children": {
                                        "CryptoArt": {"children": {}},
                                        "Token Galleries": {"children": {}},
                                        "AR Installations": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Streaming Culture": {
                            "children": {
                                "Live Performances": {
                                    "children": {
                                        "Concert Streams": {"children": {}},
                                        "Online Plays": {"children": {}},
                                        "Dance Broadcasts": {"children": {}}
                                    }
                                },
                                "Cultural Podcasts": {
                                    "children": {
                                        "Art Talks": {"children": {}},
                                        "Storytelling Shows": {"children": {}},
                                        "Creative Interviews": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

GLOBAL_VALUE_RECORD = {}

def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False

def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue


        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(1000)

✅ 数据已保存至 ./csv/sunburst\sunburst_Cultural Trends and Influences_1.csv

📄 生成的数据样例：
                   parent                   child  value
0                    Root     Creative Expression  21.69
1     Creative Expression             Visual Arts   8.93
2     Creative Expression         Performing Arts  12.76
3         Performing Arts                 Theatre   5.68
4         Performing Arts                   Dance   7.08
5                    Root  Cultural Participation  41.46
6  Cultural Participation           Art Festivals  11.88
7           Art Festivals    International Events   6.68
8           Art Festivals         Local Festivals   5.20
9  Cultural Participation     Heritage Engagement  29.58
✅ 数据已保存至 ./csv/sunburst\sunburst_Cultural Trends and Influences_2.csv

📄 生成的数据样例：
                   parent                   child  value
0                    Root         Digital Culture  38.35
1         Digital Culture       Streaming Culture  28.63
2       Streaming Culture       Cultur

In [17]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(22)
theme_name = "Social Media Distribution by Category (2022)"
big_theme_name  =  "Social Media and Digital Media and Streaming"

# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Platforms": {
                    "children": {
                        "Social Networks": {
                            "children": {
                                "Facebook": {
                                    "children": {
                                        "Pages": {"children": {}},
                                        "Groups": {"children": {}},
                                        "Marketplace": {"children": {}}
                                    }
                                },
                                "Instagram": {
                                    "children": {
                                        "Stories": {"children": {}},
                                        "Reels": {"children": {}},
                                        "Shop": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Messaging Apps": {
                            "children": {
                                "WhatsApp": {
                                    "children": {
                                        "Status": {"children": {}},
                                        "Channels": {"children": {}}
                                    }
                                },
                                "Telegram": {
                                    "children": {
                                        "Public Channels": {"children": {}},
                                        "Bots": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Content Creation": {
                    "children": {
                        "Video Platforms": {
                            "children": {
                                "YouTube": {
                                    "children": {
                                        "Shorts": {"children": {}},
                                        "Live Streams": {"children": {}},
                                        "Playlists": {"children": {}}
                                    }
                                },
                                "TikTok": {
                                    "children": {
                                        "Challenges": {"children": {}},
                                        "Duets": {"children": {}},
                                        "Brand Collaborations": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Podcasting": {
                            "children": {
                                "Spotify": {
                                    "children": {
                                        "Originals": {"children": {}},
                                        "User Uploads": {"children": {}}
                                    }
                                },
                                "Apple Podcasts": {
                                    "children": {
                                        "Curated Lists": {"children": {}},
                                        "Educational Series": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "User Engagement": {
                    "children": {
                        "Reactions and Comments": {
                            "children": {
                                "Likes": {
                                    "children": {
                                        "Hearts": {"children": {}},
                                        "Emojis": {"children": {}}
                                    }
                                },
                                "Comments": {
                                    "children": {
                                        "Threads": {"children": {}},
                                        "Pinned Comments": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Sharing and Virality": {
                            "children": {
                                "Reposts": {
                                    "children": {
                                        "Stories Share": {"children": {}},
                                        "Link Shares": {"children": {}}
                                    }
                                },
                                "Memes": {
                                    "children": {
                                        "Trends": {"children": {}},
                                        "Remixes": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}
def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False


GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(500)

✅ 数据已保存至 ./csv/sunburst\sunburst_Social Media and Digital Media and Streaming_1.csv

📄 生成的数据样例：
                 parent                 child  value
0                  Root      Content Creation  20.14
1      Content Creation            Podcasting   8.95
2      Content Creation       Video Platforms  11.19
3       Video Platforms                TikTok   6.02
4       Video Platforms               YouTube   5.17
5                  Root       User Engagement  58.67
6       User Engagement  Sharing and Virality  52.48
7  Sharing and Virality               Reposts  32.11
8               Reposts           Link Shares  10.29
9               Reposts         Stories Share  21.82
✅ 数据已保存至 ./csv/sunburst\sunburst_Social Media and Digital Media and Streaming_2.csv

📄 生成的数据样例：
                 parent                   child  value
0                  Root         User Engagement  36.55
1       User Engagement  Reactions and Comments   5.29
2       User Engagement    Sharing and Virality  31.26
3  Sh

In [18]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(12)
# Energy主题
theme_name = "Energy Distribution by Category (2020)"
big_theme_name = "Energy and Utilities"

# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Energy Sources": {
                    "children": {
                        "Fossil Fuels": {
                            "children": {
                                "Coal": {
                                    "children": {
                                        "Surface Mining": {"children": {}},
                                        "Underground Mining": {"children": {}}
                                    }
                                },
                                "Oil": {
                                    "children": {
                                        "Onshore Drilling": {"children": {}},
                                        "Offshore Platforms": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Renewables": {
                            "children": {
                                "Solar": {
                                    "children": {
                                        "Photovoltaic": {"children": {}},
                                        "Solar Thermal": {"children": {}},
                                        "Floating Solar": {"children": {}}
                                    }
                                },
                                "Wind": {
                                    "children": {
                                        "Onshore Wind": {"children": {}},
                                        "Offshore Wind": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Energy Infrastructure": {
                    "children": {
                        "Generation": {
                            "children": {
                                "Thermal Plants": {
                                    "children": {
                                        "Gas Turbines": {"children": {}},
                                        "Steam Boilers": {"children": {}}
                                    }
                                },
                                "Hydropower": {
                                    "children": {
                                        "Dams": {"children": {}},
                                        "Run-of-River": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Transmission": {
                            "children": {
                                "High Voltage Lines": {
                                    "children": {
                                        "AC Lines": {"children": {}},
                                        "DC Lines": {"children": {}}
                                    }
                                },
                                "Smart Grids": {
                                    "children": {
                                        "Sensors": {"children": {}},
                                        "Control Centers": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Energy Consumption": {
                    "children": {
                        "Residential": {
                            "children": {
                                "Heating": {
                                    "children": {
                                        "Gas Heating": {"children": {}},
                                        "Electric Heating": {"children": {}}
                                    }
                                },
                                "Appliances": {
                                    "children": {
                                        "Refrigerators": {"children": {}},
                                        "Washing Machines": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Industrial": {
                            "children": {
                                "Manufacturing": {
                                    "children": {
                                        "Steel Production": {"children": {}},
                                        "Cement Kilns": {"children": {}}
                                    }
                                },
                                "Mining": {
                                    "children": {
                                        "Ore Processing": {"children": {}},
                                        "Pumping Systems": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}
def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False


GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(1000)

✅ 数据已保存至 ./csv/sunburst\sunburst_Energy and Utilities_1.csv

📄 生成的数据样例：
           parent           child  value
0            Root  Energy Sources  38.45
1  Energy Sources      Renewables  30.50
2      Renewables           Solar  16.49
3           Solar   Solar Thermal   5.89
4           Solar    Photovoltaic   5.13
5           Solar  Floating Solar   5.47
6      Renewables            Wind  14.01
7            Wind   Offshore Wind   7.56
8            Wind    Onshore Wind   6.45
9  Energy Sources    Fossil Fuels   7.95
✅ 数据已保存至 ./csv/sunburst\sunburst_Energy and Utilities_2.csv

📄 生成的数据样例：
                  parent                  child  value
0                   Root  Energy Infrastructure  36.59
1  Energy Infrastructure             Generation  20.55
2             Generation         Thermal Plants  14.52
3         Thermal Plants          Steam Boilers   7.12
4         Thermal Plants           Gas Turbines   7.40
5             Generation             Hydropower   6.03
6  Energy Infrastruc

In [19]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(52)

# Agriculture Production主题
theme_name = "Agriculture Production Distribution by Category (2024)"
big_theme_name = "Agriculture and Food Production"

# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Crop Production": {
                    "children": {
                        "Grains": {
                            "children": {
                                "Wheat": {
                                    "children": {
                                        "Spring Wheat": {"children": {}},
                                        "Winter Wheat": {"children": {}}
                                    }
                                },
                                "Rice": {
                                    "children": {
                                        "Irrigated": {"children": {}},
                                        "Rainfed": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Oilseeds": {
                            "children": {
                                "Soybean": {
                                    "children": {
                                        "Conventional": {"children": {}},
                                        "GMO": {"children": {}}
                                    }
                                },
                                "Canola": {
                                    "children": {
                                        "Hybrid Varieties": {"children": {}},
                                        "Open-Pollinated": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Livestock": {
                    "children": {
                        "Cattle": {
                            "children": {
                                "Beef": {
                                    "children": {
                                        "Feedlot": {"children": {}},
                                        "Pasture": {"children": {}}
                                    }
                                },
                                "Dairy": {
                                    "children": {
                                        "Holstein": {"children": {}},
                                        "Jersey": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Poultry": {
                            "children": {
                                "Broilers": {
                                    "children": {
                                        "Intensive": {"children": {}},
                                        "Free-Range": {"children": {}}
                                    }
                                },
                                "Egg Layers": {
                                    "children": {
                                        "Caged": {"children": {}},
                                        "Cage-Free": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Agricultural Inputs": {
                    "children": {
                        "Seeds": {
                            "children": {
                                "Hybrid Seeds": {
                                    "children": {
                                        "Maize Hybrids": {"children": {}},
                                        "Vegetable Hybrids": {"children": {}}
                                    }
                                },
                                "Heirloom Seeds": {
                                    "children": {
                                        "Tomato": {"children": {}},
                                        "Chili Pepper": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Fertilizers": {
                            "children": {
                                "Nitrogen": {
                                    "children": {
                                        "Urea": {"children": {}},
                                        "Ammonium Nitrate": {"children": {}}
                                    }
                                },
                                "Organic": {
                                    "children": {
                                        "Compost": {"children": {}},
                                        "Manure": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False


GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(500)

✅ 数据已保存至 ./csv/sunburst\sunburst_Agriculture and Food Production_1.csv

📄 生成的数据样例：
                parent                child  value
0                 Root  Agricultural Inputs  37.78
1  Agricultural Inputs                Seeds   9.78
2  Agricultural Inputs          Fertilizers  28.00
3          Fertilizers             Nitrogen  11.22
4             Nitrogen                 Urea   6.18
5             Nitrogen     Ammonium Nitrate   5.04
6          Fertilizers              Organic  16.78
7              Organic              Compost   9.61
8              Organic               Manure   7.17
9                 Root      Crop Production  26.03
✅ 数据已保存至 ./csv/sunburst\sunburst_Agriculture and Food Production_2.csv

📄 生成的数据样例：
      parent            child  value
0       Root        Livestock  32.81
1  Livestock          Poultry  11.84
2    Poultry         Broilers   5.74
3    Poultry       Egg Layers   6.10
4  Livestock           Cattle  20.97
5     Cattle            Dairy   8.91
6     Cattle  

In [20]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(35)

# Science主题
theme_name = "Science Distribution by Category (2021)"
big_theme_name = "Science and Engineering"

# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Physical Sciences": {
                    "children": {
                        "Physics": {
                            "children": {
                                "Mechanics": {
                                    "children": {
                                        "Kinematics": {"children": {}},
                                        "Dynamics": {"children": {}},
                                        "Oscillations": {"children": {}}
                                    }
                                },
                                "Quantum Physics": {
                                    "children": {
                                        "Quantum Fields": {"children": {}},
                                        "Entanglement": {"children": {}},
                                        "Quantum Measurement": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Chemistry": {
                            "children": {
                                "Organic Chemistry": {
                                    "children": {
                                        "Hydrocarbons": {"children": {}},
                                        "Reactions": {"children": {}},
                                        "Synthesis": {"children": {}}
                                    }
                                },
                                "Inorganic Chemistry": {
                                    "children": {
                                        "Transition Metals": {"children": {}},
                                        "Crystal Structures": {"children": {}},
                                        "Acid-Base Theory": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Life Sciences": {
                    "children": {
                        "Biology": {
                            "children": {
                                "Genetics": {
                                    "children": {
                                        "DNA Structure": {"children": {}},
                                        "Inheritance Patterns": {"children": {}},
                                        "Genetic Engineering": {"children": {}}
                                    }
                                },
                                "Ecology": {
                                    "children": {
                                        "Ecosystems": {"children": {}},
                                        "Biodiversity": {"children": {}},
                                        "Conservation": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Neuroscience": {
                            "children": {
                                "Brain Function": {
                                    "children": {
                                        "Cognition": {"children": {}},
                                        "Emotion": {"children": {}},
                                        "Learning and Memory": {"children": {}}
                                    }
                                },
                                "Neural Networks": {
                                    "children": {
                                        "Synapses": {"children": {}},
                                        "Neuroplasticity": {"children": {}},
                                        "Neuroimaging": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Applied Sciences": {
                    "children": {
                        "Engineering": {
                            "children": {
                                "Mechanical": {
                                    "children": {
                                        "Thermodynamics": {"children": {}},
                                        "Fluid Mechanics": {"children": {}},
                                        "Robotics": {"children": {}}
                                    }
                                },
                                "Electrical": {
                                    "children": {
                                        "Circuits": {"children": {}},
                                        "Signal Processing": {"children": {}},
                                        "Power Systems": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Computer Science": {
                            "children": {
                                "Algorithms": {
                                    "children": {
                                        "Sorting": {"children": {}},
                                        "Graphs": {"children": {}},
                                        "Dynamic Programming": {"children": {}}
                                    }
                                },
                                "Artificial Intelligence": {
                                    "children": {
                                        "Machine Learning": {"children": {}},
                                        "Natural Language": {"children": {}},
                                        "Computer Vision": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False



GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data

# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(500)

✅ 数据已保存至 ./csv/sunburst\sunburst_Science and Engineering_1.csv

📄 生成的数据样例：
              parent                child  value
0               Root    Physical Sciences  31.65
1  Physical Sciences              Physics  19.30
2            Physics      Quantum Physics  13.30
3            Physics            Mechanics   6.00
4  Physical Sciences            Chemistry  12.35
5          Chemistry  Inorganic Chemistry   5.30
6          Chemistry    Organic Chemistry   7.05
7               Root        Life Sciences  31.54
8      Life Sciences         Neuroscience   5.13
9       Neuroscience      Neural Networks   5.13
✅ 数据已保存至 ./csv/sunburst\sunburst_Science and Engineering_2.csv

📄 生成的数据样例：
                parent                child  value
0                 Root     Applied Sciences  23.73
1     Applied Sciences          Engineering  15.87
2          Engineering           Electrical   6.45
3          Engineering           Mechanical   9.42
4     Applied Sciences     Computer Science   7.86
5    

In [21]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(38)

# Food Industry主题
theme_name = "Food Industry Distribution by Category (2023)"
big_theme_name = "Food and Beverage Industry"

# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Food Production": {
                    "children": {
                        "Agriculture": {
                            "children": {
                                "Crop Farming": {
                                    "children": {
                                        "Grains": {"children": {}},
                                        "Vegetables": {"children": {}},
                                        "Fruits": {"children": {}}
                                    }
                                },
                                "Livestock": {
                                    "children": {
                                        "Cattle": {"children": {}},
                                        "Poultry": {"children": {}},
                                        "Dairy Farming": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Aquaculture": {
                            "children": {
                                "Freshwater Farming": {
                                    "children": {
                                        "Tilapia": {"children": {}},
                                        "Catfish": {"children": {}}
                                    }
                                },
                                "Marine Farming": {
                                    "children": {
                                        "Salmon": {"children": {}},
                                        "Seaweed": {"children": {}},
                                        "Shellfish": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Food Processing": {
                    "children": {
                        "Meat Processing": {
                            "children": {
                                "Slaughtering": {
                                    "children": {
                                        "Beef": {"children": {}},
                                        "Pork": {"children": {}}
                                    }
                                },
                                "Packaging": {
                                    "children": {
                                        "Vacuum Sealed": {"children": {}},
                                        "Canned Meats": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Dairy Processing": {
                            "children": {
                                "Milk Products": {
                                    "children": {
                                        "Cheese": {"children": {}},
                                        "Butter": {"children": {}},
                                        "Yogurt": {"children": {}}
                                    }
                                },
                                "Ice Cream": {
                                    "children": {
                                        "Frozen Yogurt": {"children": {}},
                                        "Gelato": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Food Distribution": {
                    "children": {
                        "Wholesale": {
                            "children": {
                                "Regional Hubs": {
                                    "children": {
                                        "Cold Chain": {"children": {}},
                                        "Dry Goods": {"children": {}}
                                    }
                                },
                                "Export Networks": {
                                    "children": {
                                        "Refrigerated Shipping": {"children": {}},
                                        "Air Freight": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Retail": {
                            "children": {
                                "Supermarkets": {
                                    "children": {
                                        "Fresh Produce": {"children": {}},
                                        "Meat Counters": {"children": {}}
                                    }
                                },
                                "Convenience Stores": {
                                    "children": {
                                        "Packaged Snacks": {"children": {}},
                                        "Ready Meals": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}



def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False


GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(1000)

✅ 数据已保存至 ./csv/sunburst\sunburst_Food and Beverage Industry_1.csv

📄 生成的数据样例：
               parent               child  value
0                Root     Food Production  31.69
1     Food Production         Agriculture  12.25
2         Agriculture           Livestock   6.56
3         Agriculture        Crop Farming   5.69
4     Food Production         Aquaculture  19.44
5         Aquaculture  Freshwater Farming  12.20
6  Freshwater Farming             Tilapia   5.46
7  Freshwater Farming             Catfish   6.74
8         Aquaculture      Marine Farming   7.24
9                Root     Food Processing  46.88
✅ 数据已保存至 ./csv/sunburst\sunburst_Food and Beverage Industry_2.csv

📄 生成的数据样例：
              parent               child  value
0               Root   Food Distribution  21.46
1  Food Distribution              Retail  10.30
2             Retail  Convenience Stores   5.29
3             Retail        Supermarkets   5.01
4  Food Distribution           Wholesale  11.16
5          Wholes

In [22]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(62)

# Education主题
theme_name = "Education Distribution by Category (2024)"
big_theme_name = "Education and Academics"


# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Formal Education": {
                    "children": {
                        "Primary Education": {
                            "children": {
                                "Public Primary": {
                                    "children": {
                                        "Urban Schools": {"children": {}},
                                        "Rural Schools": {"children": {}},
                                        "Special Needs Programs": {"children": {}}
                                    }
                                },
                                "Private Primary": {
                                    "children": {
                                        "Religious Schools": {"children": {}},
                                        "Montessori": {"children": {}},
                                        "International Curriculum": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Secondary Education": {
                            "children": {
                                "High Schools": {
                                    "children": {
                                        "STEM Tracks": {"children": {}},
                                        "Humanities Tracks": {"children": {}},
                                        "Arts Focus": {"children": {}}
                                    }
                                },
                                "Vocational Schools": {
                                    "children": {
                                        "Technical Training": {"children": {}},
                                        "Apprenticeship Programs": {"children": {}},
                                        "Career Prep Courses": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Higher Education": {
                    "children": {
                        "Undergraduate": {
                            "children": {
                                "Liberal Arts Colleges": {
                                    "children": {
                                        "BA Programs": {"children": {}},
                                        "BSc Programs": {"children": {}},
                                        "Dual Majors": {"children": {}}
                                    }
                                },
                                "Research Universities": {
                                    "children": {
                                        "Engineering Schools": {"children": {}},
                                        "Medical Schools": {"children": {}},
                                        "Business Schools": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Postgraduate": {
                            "children": {
                                "Masters Programs": {
                                    "children": {
                                        "MBA": {"children": {}},
                                        "MA": {"children": {}},
                                        "MSc": {"children": {}}
                                    }
                                },
                                "Doctoral Programs": {
                                    "children": {
                                        "PhD in Science": {"children": {}},
                                        "PhD in Education": {"children": {}},
                                        "PhD in Humanities": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Informal Learning": {
                    "children": {
                        "Online Education": {
                            "children": {
                                "MOOCs": {
                                    "children": {
                                        "Coursera": {"children": {}},
                                        "edX": {"children": {}},
                                        "FutureLearn": {"children": {}}
                                    }
                                },
                                "Skill Platforms": {
                                    "children": {
                                        "Udemy": {"children": {}},
                                        "Skillshare": {"children": {}},
                                        "LinkedIn Learning": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Community Learning": {
                            "children": {
                                "Public Libraries": {
                                    "children": {
                                        "Workshops": {"children": {}},
                                        "Book Clubs": {"children": {}},
                                        "Digital Literacy": {"children": {}}
                                    }
                                },
                                "Non-Profit Programs": {
                                    "children": {
                                        "Adult Literacy": {"children": {}},
                                        "Youth Mentoring": {"children": {}},
                                        "Refugee Education": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False


GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data

# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(500)

✅ 数据已保存至 ./csv/sunburst\sunburst_Education and Academics_1.csv

📄 生成的数据样例：
              parent                  child  value
0               Root       Higher Education  20.68
1   Higher Education          Undergraduate  13.95
2      Undergraduate  Research Universities   5.20
3      Undergraduate  Liberal Arts Colleges   8.75
4   Higher Education           Postgraduate   6.73
5               Root       Formal Education  36.93
6   Formal Education      Primary Education  13.85
7  Primary Education         Public Primary   7.87
8  Primary Education        Private Primary   5.98
9   Formal Education    Secondary Education  23.08
✅ 数据已保存至 ./csv/sunburst\sunburst_Education and Academics_2.csv

📄 生成的数据样例：
                  parent                  child  value
0                   Root       Higher Education  27.71
1       Higher Education          Undergraduate  22.64
2          Undergraduate  Research Universities  15.93
3  Research Universities    Engineering Schools   5.08
4  Research Un

In [23]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(27)

# Sports主题
theme_name = "Sports Distribution by Category (2024)"
big_theme_name = "Sports and Entertainment"



# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Competitive Sports": {
                    "children": {
                        "Team Sports": {
                            "children": {
                                "Football": {
                                    "children": {
                                        "World Cup": {"children": {}},
                                        "League Matches": {"children": {}},
                                        "Youth Championships": {"children": {}}
                                    }
                                },
                                "Basketball": {
                                    "children": {
                                        "NBA": {"children": {}},
                                        "FIBA Events": {"children": {}},
                                        "3x3 Tournaments": {"children": {}}
                                    }
                                },
                                "Volleyball": {
                                    "children": {
                                        "World League": {"children": {}},
                                        "Beach Volleyball": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Individual Sports": {
                            "children": {
                                "Tennis": {
                                    "children": {
                                        "Grand Slams": {"children": {}},
                                        "ATP Tour": {"children": {}},
                                        "WTA Tour": {"children": {}}
                                    }
                                },
                                "Athletics": {
                                    "children": {
                                        "Marathon": {"children": {}},
                                        "Track Events": {"children": {}},
                                        "Field Events": {"children": {}}
                                    }
                                },
                                "Combat Sports": {
                                    "children": {
                                        "Boxing": {"children": {}},
                                        "Wrestling": {"children": {}},
                                        "MMA": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Fitness and Wellness": {
                    "children": {
                        "Personal Training": {
                            "children": {
                                "Gym Workouts": {
                                    "children": {
                                        "Strength Training": {"children": {}},
                                        "Cardio Sessions": {"children": {}},
                                        "Flexibility Training": {"children": {}}
                                    }
                                },
                                "Yoga and Meditation": {
                                    "children": {
                                        "Hatha Yoga": {"children": {}},
                                        "Vinyasa Flow": {"children": {}},
                                        "Mindfulness Sessions": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Outdoor Activities": {
                            "children": {
                                "Running Clubs": {
                                    "children": {
                                        "Trail Runs": {"children": {}},
                                        "City Runs": {"children": {}}
                                    }
                                },
                                "Cycling Groups": {
                                    "children": {
                                        "Road Biking": {"children": {}},
                                        "Mountain Biking": {"children": {}},
                                        "Virtual Races": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Sports Media and Tech": {
                    "children": {
                        "Live Broadcasting": {
                            "children": {
                                "Television Coverage": {
                                    "children": {
                                        "Olympic Coverage": {"children": {}},
                                        "National Leagues": {"children": {}}
                                    }
                                },
                                "Online Streaming": {
                                    "children": {
                                        "YouTube Sports": {"children": {}},
                                        "Twitch Coverage": {"children": {}},
                                        "OTT Platforms": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Sports Technology": {
                            "children": {
                                "Wearables": {
                                    "children": {
                                        "Heart Rate Monitors": {"children": {}},
                                        "Fitness Trackers": {"children": {}}
                                    }
                                },
                                "Analytics Tools": {
                                    "children": {
                                        "Performance Analysis": {"children": {}},
                                        "Tactical Modeling": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False



GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(500)

✅ 数据已保存至 ./csv/sunburst\sunburst_Sports and Entertainment_1.csv

📄 生成的数据样例：
                 parent                 child  value
0                  Root    Competitive Sports  30.57
1    Competitive Sports     Individual Sports  10.65
2    Competitive Sports           Team Sports  19.92
3           Team Sports            Volleyball   5.61
4           Team Sports              Football   5.59
5           Team Sports            Basketball   8.71
6                  Root  Fitness and Wellness  47.52
7  Fitness and Wellness     Personal Training  42.43
8     Personal Training          Gym Workouts  12.63
9     Personal Training   Yoga and Meditation  29.80
✅ 数据已保存至 ./csv/sunburst\sunburst_Sports and Entertainment_2.csv

📄 生成的数据样例：
                  parent                  child  value
0                   Root     Competitive Sports  20.34
1     Competitive Sports            Team Sports  14.16
2     Competitive Sports      Individual Sports   6.18
3                   Root  Sports Media and Te

In [24]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(36)

# Employee主题
theme_name = "Employee Distribution by Category (2021)"
big_theme_name = "Human Resources and Employee Management "



# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Employee Lifecycle": {
                    "children": {
                        "Recruitment": {
                            "children": {
                                "Job Posting": {
                                    "children": {
                                        "Internal Boards": {"children": {}},
                                        "External Platforms": {"children": {}},
                                        "Referral Programs": {"children": {}}
                                    }
                                },
                                "Candidate Evaluation": {
                                    "children": {
                                        "Resume Screening": {"children": {}},
                                        "Interview Rounds": {"children": {}},
                                        "Assessment Tests": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Onboarding": {
                            "children": {
                                "Orientation": {
                                    "children": {
                                        "Welcome Sessions": {"children": {}},
                                        "Policy Training": {"children": {}},
                                        "Facility Tours": {"children": {}}
                                    }
                                },
                                "Mentorship": {
                                    "children": {
                                        "Buddy System": {"children": {}},
                                        "Learning Modules": {"children": {}},
                                        "Shadowing Programs": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Employee Development": {
                    "children": {
                        "Training Programs": {
                            "children": {
                                "Skill Workshops": {
                                    "children": {
                                        "Technical Skills": {"children": {}},
                                        "Soft Skills": {"children": {}},
                                        "Leadership": {"children": {}}
                                    }
                                },
                                "Online Learning": {
                                    "children": {
                                        "LMS Platforms": {"children": {}},
                                        "Self-paced Modules": {"children": {}},
                                        "Certifications": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Performance Management": {
                            "children": {
                                "Goal Setting": {
                                    "children": {
                                        "OKRs": {"children": {}},
                                        "SMART Goals": {"children": {}},
                                        "Team Objectives": {"children": {}}
                                    }
                                },
                                "Feedback": {
                                    "children": {
                                        "360 Review": {"children": {}},
                                        "Peer Review": {"children": {}},
                                        "Manager Review": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Workplace Wellbeing": {
                    "children": {
                        "Health & Safety": {
                            "children": {
                                "Ergonomics": {
                                    "children": {
                                        "Workstation Setup": {"children": {}},
                                        "Posture Training": {"children": {}},
                                        "Eye Care": {"children": {}}
                                    }
                                },
                                "Emergency Response": {
                                    "children": {
                                        "Evacuation Drills": {"children": {}},
                                        "First Aid Training": {"children": {}},
                                        "Fire Safety": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Work-Life Balance": {
                            "children": {
                                "Flexible Scheduling": {
                                    "children": {
                                        "Remote Work": {"children": {}},
                                        "Hybrid Options": {"children": {}},
                                        "Compressed Weeks": {"children": {}}
                                    }
                                },
                                "Employee Assistance": {
                                    "children": {
                                        "Counseling Services": {"children": {}},
                                        "Financial Wellness": {"children": {}},
                                        "Mental Health Days": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}
def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False



GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(1000)

✅ 数据已保存至 ./csv/sunburst\sunburst_Human Resources and Employee Management _1.csv

📄 生成的数据样例：
                parent                child  value
0                 Root  Workplace Wellbeing  36.70
1  Workplace Wellbeing      Health & Safety  31.69
2      Health & Safety   Emergency Response   8.87
3      Health & Safety           Ergonomics  22.82
4           Ergonomics    Workstation Setup   6.80
5           Ergonomics     Posture Training   6.98
6           Ergonomics             Eye Care   9.04
7  Workplace Wellbeing    Work-Life Balance   5.01
8                 Root   Employee Lifecycle  33.07
9   Employee Lifecycle          Recruitment  22.00
✅ 数据已保存至 ./csv/sunburst\sunburst_Human Resources and Employee Management _2.csv

📄 生成的数据样例：
                parent                child  value
0                 Root  Workplace Wellbeing  39.08
1  Workplace Wellbeing    Work-Life Balance  31.36
2    Work-Life Balance  Employee Assistance  19.87
3  Employee Assistance   Mental Health Days   5.32


In [25]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(42)

# E-commerce主题
theme_name = "E-commerce Distribution by Category (2024)"
big_theme_name = "Retail and E-commerce"



# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Marketplace Platforms": {
                    "children": {
                        "B2C Platforms": {
                            "children": {
                                "Fashion Retailers": {
                                    "children": {
                                        "Fast Fashion": {"children": {}},
                                        "Luxury Brands": {"children": {}},
                                        "Ethical Apparel": {"children": {}}
                                    }
                                },
                                "Electronics Retailers": {
                                    "children": {
                                        "Smartphones": {"children": {}},
                                        "Laptops": {"children": {}},
                                        "Wearables": {"children": {}}
                                    }
                                }
                            }
                        },
                        "C2C Marketplaces": {
                            "children": {
                                "Handmade Goods": {
                                    "children": {
                                        "Craft Supplies": {"children": {}},
                                        "Custom Gifts": {"children": {}},
                                        "Upcycled Products": {"children": {}}
                                    }
                                },
                                "Used Goods": {
                                    "children": {
                                        "Secondhand Fashion": {"children": {}},
                                        "Electronics Trade": {"children": {}},
                                        "Home Furnishings": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Payment & Logistics": {
                    "children": {
                        "Digital Payment Systems": {
                            "children": {
                                "E-Wallets": {
                                    "children": {
                                        "Mobile Wallets": {"children": {}},
                                        "Crypto Payments": {"children": {}},
                                        "QR Code Systems": {"children": {}}
                                    }
                                },
                                "Buy Now Pay Later": {
                                    "children": {
                                        "Installment Apps": {"children": {}},
                                        "Credit Scoring": {"children": {}},
                                        "Deferred Billing": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Fulfillment & Delivery": {
                            "children": {
                                "Warehousing": {
                                    "children": {
                                        "Cold Chain": {"children": {}},
                                        "3PL Providers": {"children": {}},
                                        "Same-day Storage": {"children": {}}
                                    }
                                },
                                "Last-mile Delivery": {
                                    "children": {
                                        "Bike Couriers": {"children": {}},
                                        "Drone Delivery": {"children": {}},
                                        "Smart Lockers": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Customer Engagement": {
                    "children": {
                        "Marketing Strategies": {
                            "children": {
                                "Email Campaigns": {
                                    "children": {
                                        "Promotions": {"children": {}},
                                        "Abandoned Cart": {"children": {}},
                                        "Loyalty Rewards": {"children": {}}
                                    }
                                },
                                "Social Media Ads": {
                                    "children": {
                                        "Influencer Marketing": {"children": {}},
                                        "Short Videos": {"children": {}},
                                        "Interactive Polls": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Customer Service": {
                            "children": {
                                "Chatbots": {
                                    "children": {
                                        "AI Agents": {"children": {}},
                                        "FAQ Automation": {"children": {}},
                                        "Multilingual Support": {"children": {}}
                                    }
                                },
                                "Live Support": {
                                    "children": {
                                        "Phone Help": {"children": {}},
                                        "Video Assistance": {"children": {}},
                                        "In-app Messaging": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False



GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(500)

✅ 数据已保存至 ./csv/sunburst\sunburst_Retail and E-commerce_1.csv

📄 生成的数据样例：
                    parent                    child  value
0                     Root      Payment & Logistics  21.69
1      Payment & Logistics   Fulfillment & Delivery   8.93
2      Payment & Logistics  Digital Payment Systems  12.76
3  Digital Payment Systems                E-Wallets   5.68
4  Digital Payment Systems        Buy Now Pay Later   7.08
5                     Root    Marketplace Platforms  41.46
6    Marketplace Platforms            B2C Platforms  11.88
7            B2C Platforms    Electronics Retailers   6.68
8            B2C Platforms        Fashion Retailers   5.20
9    Marketplace Platforms         C2C Marketplaces  29.58
✅ 数据已保存至 ./csv/sunburst\sunburst_Retail and E-commerce_2.csv

📄 生成的数据样例：
                  parent                  child  value
0                   Root    Customer Engagement  38.35
1    Customer Engagement       Customer Service  28.63
2       Customer Service           Live 

In [26]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(56)

# Healthcare主题
theme_name = "Healthcare Distribution by Category (2022)"
big_theme_name = "Healthcare and Health"

# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Public Health Systems": {
                    "children": {
                        "Government Programs": {
                            "children": {
                                "Vaccination Initiatives": {
                                    "children": {
                                        "Child Immunization": {"children": {}},
                                        "Adult Booster Shots": {"children": {}},
                                        "Pandemic Campaigns": {"children": {}}
                                    }
                                },
                                "Maternal Health": {
                                    "children": {
                                        "Prenatal Services": {"children": {}},
                                        "Postnatal Care": {"children": {}},
                                        "Nutrition for Mothers": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Health Surveillance": {
                            "children": {
                                "Disease Registries": {
                                    "children": {
                                        "Cancer Registries": {"children": {}},
                                        "Chronic Illness Tracking": {"children": {}},
                                        "Rare Disease Logs": {"children": {}}
                                    }
                                },
                                "Outbreak Monitoring": {
                                    "children": {
                                        "Real-time Alerts": {"children": {}},
                                        "Zoonotic Surveillance": {"children": {}},
                                        "Environmental Sensors": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Medical Services": {
                    "children": {
                        "Primary Care": {
                            "children": {
                                "Family Physicians": {
                                    "children": {
                                        "Routine Checkups": {"children": {}},
                                        "Health Screenings": {"children": {}},
                                        "Referral Management": {"children": {}}
                                    }
                                },
                                "Walk-in Clinics": {
                                    "children": {
                                        "Minor Injuries": {"children": {}},
                                        "Non-Emergency Cases": {"children": {}},
                                        "Prescription Refills": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Specialized Care": {
                            "children": {
                                "Cardiology": {
                                    "children": {
                                        "Heart Disease": {"children": {}},
                                        "Blood Pressure": {"children": {}},
                                        "Arrhythmia Clinics": {"children": {}}
                                    }
                                },
                                "Oncology": {
                                    "children": {
                                        "Chemotherapy Units": {"children": {}},
                                        "Radiation Therapy": {"children": {}},
                                        "Cancer Counseling": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Health Technologies": {
                    "children": {
                        "Medical Devices": {
                            "children": {
                                "Diagnostic Equipment": {
                                    "children": {
                                        "MRI Machines": {"children": {}},
                                        "Ultrasound": {"children": {}},
                                        "X-ray Systems": {"children": {}}
                                    }
                                },
                                "Monitoring Wearables": {
                                    "children": {
                                        "Heart Rate Trackers": {"children": {}},
                                        "Blood Glucose Sensors": {"children": {}},
                                        "Sleep Monitors": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Health Informatics": {
                            "children": {
                                "Electronic Records": {
                                    "children": {
                                        "EMR Systems": {"children": {}},
                                        "Patient Portals": {"children": {}},
                                        "Data Privacy Tools": {"children": {}}
                                    }
                                },
                                "Decision Support": {
                                    "children": {
                                        "Clinical AI": {"children": {}},
                                        "Drug Interaction Check": {"children": {}},
                                        "Predictive Modeling": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}


def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False



GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(500)

✅ 数据已保存至 ./csv/sunburst\sunburst_Healthcare and Health_1.csv

📄 生成的数据样例：
                  parent                  child  value
0                   Root       Medical Services  29.03
1       Medical Services           Primary Care  22.61
2           Primary Care        Walk-in Clinics  12.35
3           Primary Care      Family Physicians  10.26
4       Medical Services       Specialized Care   6.42
5                   Root  Public Health Systems  30.43
6  Public Health Systems    Health Surveillance  24.21
7    Health Surveillance    Outbreak Monitoring  13.98
8    Health Surveillance     Disease Registries  10.23
9  Public Health Systems    Government Programs   6.22
✅ 数据已保存至 ./csv/sunburst\sunburst_Healthcare and Health_2.csv

📄 生成的数据样例：
                  parent                    child  value
0                   Root    Public Health Systems  38.21
1  Public Health Systems      Government Programs  25.11
2    Government Programs          Maternal Health  13.80
3    Government Progr

In [27]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(32)

# Housing Market主题
theme_name = "Housing Market Distribution by Category (2023)"
big_theme_name = "Real Estate and Housing Market"


# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Residential Property": {
                    "children": {
                        "Single Family Homes": {
                            "children": {
                                "Detached": {
                                    "children": {
                                        "Urban Detached": {"children": {}},
                                        "Suburban Detached": {"children": {}},
                                        "Luxury Detached": {"children": {}}
                                    }
                                },
                                "Semi-Detached": {
                                    "children": {
                                        "Shared Wall Homes": {"children": {}},
                                        "Corner Units": {"children": {}},
                                        "Attached Garages": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Multi-Family Homes": {
                            "children": {
                                "Duplex": {
                                    "children": {
                                        "Owner-Occupied": {"children": {}},
                                        "Fully Rented": {"children": {}},
                                        "Basement Units": {"children": {}}
                                    }
                                },
                                "Apartments": {
                                    "children": {
                                        "Low-Rise": {"children": {}},
                                        "Mid-Rise": {"children": {}},
                                        "High-Rise": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Commercial Real Estate": {
                    "children": {
                        "Office Buildings": {
                            "children": {
                                "Downtown Offices": {
                                    "children": {
                                        "Class A": {"children": {}},
                                        "Class B": {"children": {}},
                                        "Shared Workspace": {"children": {}}
                                    }
                                },
                                "Suburban Offices": {
                                    "children": {
                                        "Corporate Campuses": {"children": {}},
                                        "Tech Parks": {"children": {}},
                                        "Converted Warehouses": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Retail Spaces": {
                            "children": {
                                "Shopping Centers": {
                                    "children": {
                                        "Malls": {"children": {}},
                                        "Strip Malls": {"children": {}},
                                        "Outlet Centers": {"children": {}}
                                    }
                                },
                                "Street-Level Shops": {
                                    "children": {
                                        "Historic Districts": {"children": {}},
                                        "High Foot Traffic": {"children": {}},
                                        "Luxury Boutiques": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Rental Market": {
                    "children": {
                        "Long-Term Rentals": {
                            "children": {
                                "Apartments": {
                                    "children": {
                                        "Studio": {"children": {}},
                                        "1-Bedroom": {"children": {}},
                                        "2-Bedroom": {"children": {}}
                                    }
                                },
                                "Houses for Rent": {
                                    "children": {
                                        "Townhomes": {"children": {}},
                                        "Detached Rentals": {"children": {}},
                                        "Gated Communities": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Short-Term Rentals": {
                            "children": {
                                "Vacation Homes": {
                                    "children": {
                                        "Beachfront": {"children": {}},
                                        "Mountain Cabins": {"children": {}},
                                        "City Condos": {"children": {}}
                                    }
                                },
                                "Platforms": {
                                    "children": {
                                        "Airbnb Listings": {"children": {}},
                                        "Booking.com": {"children": {}},
                                        "Vrbo Hosts": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}


def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False




GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(1000)

✅ 数据已保存至 ./csv/sunburst\sunburst_Real Estate and Housing Market_1.csv

📄 生成的数据样例：
                   parent                   child  value
0                    Root  Commercial Real Estate  23.84
1  Commercial Real Estate           Retail Spaces  17.78
2           Retail Spaces        Shopping Centers  10.56
3           Retail Spaces      Street-Level Shops   7.22
4  Commercial Real Estate        Office Buildings   6.06
5                    Root           Rental Market  49.48
6           Rental Market      Short-Term Rentals  17.22
7      Short-Term Rentals               Platforms   7.68
8      Short-Term Rentals          Vacation Homes   9.54
9           Rental Market       Long-Term Rentals  32.26
✅ 数据已保存至 ./csv/sunburst\sunburst_Real Estate and Housing Market_2.csv

📄 生成的数据样例：
                   parent                   child  value
0                    Root           Rental Market  21.03
1           Rental Market      Short-Term Rentals  15.55
2      Short-Term Rentals          Vac

In [28]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(22)

# Business主题
theme_name = "Business Distribution by Category (2021)"
big_theme_name = "Business and Finance"


# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Corporate Strategy": {
                    "children": {
                        "Growth Models": {
                            "children": {
                                "Mergers & Acquisitions": {
                                    "children": {
                                        "Horizontal Mergers": {"children": {}},
                                        "Vertical Integration": {"children": {}},
                                        "Conglomerate Deals": {"children": {}}
                                    }
                                },
                                "Market Expansion": {
                                    "children": {
                                        "Domestic Growth": {"children": {}},
                                        "Internationalization": {"children": {}},
                                        "Franchising": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Innovation Strategy": {
                            "children": {
                                "Product Innovation": {
                                    "children": {
                                        "R&D Investment": {"children": {}},
                                        "User-Centric Design": {"children": {}},
                                        "Rapid Prototyping": {"children": {}}
                                    }
                                },
                                "Process Innovation": {
                                    "children": {
                                        "Automation Systems": {"children": {}},
                                        "Lean Operations": {"children": {}},
                                        "Agile Transformation": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Financial Management": {
                    "children": {
                        "Budgeting & Planning": {
                            "children": {
                                "Capital Budgeting": {
                                    "children": {
                                        "NPV Analysis": {"children": {}},
                                        "IRR Evaluation": {"children": {}},
                                        "Payback Method": {"children": {}}
                                    }
                                },
                                "Operational Forecasting": {
                                    "children": {
                                        "Sales Forecasts": {"children": {}},
                                        "Cost Projections": {"children": {}},
                                        "Scenario Planning": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Investment Strategy": {
                            "children": {
                                "Asset Allocation": {
                                    "children": {
                                        "Equities": {"children": {}},
                                        "Bonds": {"children": {}},
                                        "Real Estate": {"children": {}}
                                    }
                                },
                                "Risk Management": {
                                    "children": {
                                        "Diversification": {"children": {}},
                                        "Hedging Instruments": {"children": {}},
                                        "Insurance Strategies": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Digital Transformation": {
                    "children": {
                        "Technology Adoption": {
                            "children": {
                                "Cloud Computing": {
                                    "children": {
                                        "Public Cloud": {"children": {}},
                                        "Hybrid Solutions": {"children": {}},
                                        "Serverless Systems": {"children": {}}
                                    }
                                },
                                "AI Integration": {
                                    "children": {
                                        "Predictive Analytics": {"children": {}},
                                        "Chatbots": {"children": {}},
                                        "AI-Powered Insights": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Data Strategy": {
                            "children": {
                                "Data Governance": {
                                    "children": {
                                        "Data Quality": {"children": {}},
                                        "Compliance (GDPR)": {"children": {}},
                                        "Master Data Management": {"children": {}}
                                    }
                                },
                                "Business Intelligence": {
                                    "children": {
                                        "Dashboards": {"children": {}},
                                        "Data Warehousing": {"children": {}},
                                        "KPI Tracking": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}




def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False



GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(500)

✅ 数据已保存至 ./csv/sunburst\sunburst_Business and Finance_1.csv

📄 生成的数据样例：
                   parent                    child  value
0                    Root     Financial Management  20.14
1    Financial Management      Investment Strategy   8.95
2    Financial Management     Budgeting & Planning  11.19
3    Budgeting & Planning  Operational Forecasting   6.02
4    Budgeting & Planning        Capital Budgeting   5.17
5                    Root   Digital Transformation  58.67
6  Digital Transformation            Data Strategy  52.48
7           Data Strategy          Data Governance  32.11
8         Data Governance        Compliance (GDPR)  13.39
9         Data Governance   Master Data Management  11.63
✅ 数据已保存至 ./csv/sunburst\sunburst_Business and Finance_2.csv

📄 生成的数据样例：
                 parent                   child  value
0                  Root      Corporate Strategy  20.60
1    Corporate Strategy           Growth Models  15.20
2         Growth Models        Market Expansion   8.0

In [29]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(24)

# Transportation主题
theme_name = "Transportation Distribution by Category (2021)"
big_theme_name = "Transportation and Logistics"

# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Urban Mobility": {
                    "children": {
                        "Public Transit": {
                            "children": {
                                "Bus Systems": {
                                    "children": {
                                        "City Buses": {"children": {}},
                                        "Shuttle Services": {"children": {}},
                                        "Bus Rapid Transit": {"children": {}}
                                    }
                                },
                                "Subway Networks": {
                                    "children": {
                                        "Underground Metro": {"children": {}},
                                        "Light Rail Transit": {"children": {}},
                                        "Monorail Systems": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Micromobility": {
                            "children": {
                                "Bike Sharing": {
                                    "children": {
                                        "Station-Based Bikes": {"children": {}},
                                        "Dockless Bikes": {"children": {}},
                                        "Electric Bicycles": {"children": {}}
                                    }
                                },
                                "E-Scooters": {
                                    "children": {
                                        "Rental Platforms": {"children": {}},
                                        "Safety Standards": {"children": {}},
                                        "Parking Zones": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Freight and Logistics": {
                    "children": {
                        "Road Freight": {
                            "children": {
                                "Trucking Fleets": {
                                    "children": {
                                        "Long-Haul Trucks": {"children": {}},
                                        "Local Delivery Vans": {"children": {}},
                                        "Refrigerated Trucks": {"children": {}}
                                    }
                                },
                                "Highway Networks": {
                                    "children": {
                                        "Expressways": {"children": {}},
                                        "Toll Roads": {"children": {}},
                                        "Logistics Parks": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Rail Freight": {
                            "children": {
                                "Cargo Trains": {
                                    "children": {
                                        "Bulk Transport": {"children": {}},
                                        "Container Rail": {"children": {}},
                                        "Auto Transport": {"children": {}}
                                    }
                                },
                                "Intermodal Hubs": {
                                    "children": {
                                        "Dry Ports": {"children": {}},
                                        "Transfer Yards": {"children": {}},
                                        "Rail Terminals": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Air Transport": {
                    "children": {
                        "Commercial Airlines": {
                            "children": {
                                "Passenger Flights": {
                                    "children": {
                                        "Domestic Airlines": {"children": {}},
                                        "International Routes": {"children": {}},
                                        "Low-Cost Carriers": {"children": {}}
                                    }
                                },
                                "Airport Infrastructure": {
                                    "children": {
                                        "Runways": {"children": {}},
                                        "Passenger Terminals": {"children": {}},
                                        "Air Traffic Control": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Air Cargo": {
                            "children": {
                                "Cargo Airlines": {
                                    "children": {
                                        "Parcel Freight": {"children": {}},
                                        "Perishable Goods": {"children": {}},
                                        "Hazardous Materials": {"children": {}}
                                    }
                                },
                                "Logistics Platforms": {
                                    "children": {
                                        "Cargo Terminals": {"children": {}},
                                        "Customs Clearance": {"children": {}},
                                        "Aviation Warehousing": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}



def is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
    """
    检查是否近似均分或重复比例过多。
    - tolerance: 每个比例与均值的相对偏差阈值（如 1.5%）
    - max_duplicates: 允许的重复比例个数
    """
    n = len(ratios)
    mean_r = 1.0 / n
    # 1. 近似均分检测（所有与均值偏差都很小）
    if all(abs(r - mean_r) / mean_r < tolerance for r in ratios):
        return True

    # 2. 重复值检测（近似值个数超过上限）
    rounded = [round(r, 3) for r in ratios]
    counts = defaultdict(int)
    for r in rounded:
        counts[r] += 1
    if max(counts.values()) > max_duplicates:
        return True

    return False





GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        if is_near_uniform(ratios, tolerance=0.015, max_duplicates=3):
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(500)

✅ 数据已保存至 ./csv/sunburst\sunburst_Transportation and Logistics_1.csv

📄 生成的数据样例：
                  parent                  child  value
0                   Root         Urban Mobility  28.72
1         Urban Mobility          Micromobility   7.61
2         Urban Mobility         Public Transit  21.11
3         Public Transit            Bus Systems  10.22
4         Public Transit        Subway Networks  10.89
5                   Root  Freight and Logistics  20.96
6  Freight and Logistics           Rail Freight   7.04
7  Freight and Logistics           Road Freight  13.92
8           Road Freight        Trucking Fleets   5.45
9           Road Freight       Highway Networks   8.47
✅ 数据已保存至 ./csv/sunburst\sunburst_Transportation and Logistics_2.csv

📄 生成的数据样例：
           parent              child  value
0            Root     Urban Mobility  49.16
1  Urban Mobility      Micromobility  19.70
2   Micromobility       Bike Sharing   7.31
3   Micromobility         E-Scooters  12.39
4  Urban Mobili

In [30]:
import random
import pandas as pd
import os
import csv
from collections import defaultdict
import numpy as np

random.seed(47)

# Tourism主题
theme_name = "Tourism Distribution by Category (2022)"
big_theme_name = "Tourism and Hospitality"
# 约束参数
MIN_RATIO = 0.05
MAX_CHILDREN = 20
MIN_LEVEL = 3
MAX_LEVEL = 5

# 新增全局值追踪系统
GLOBAL_VALUES = defaultdict(float)
GLOBAL_VALUES['Root'] = 100.0

# 动态结构模板
tree_structure = {
    "children": {
        "Root": {
            "children": {
                "Destination Types": {
                    "children": {
                        "Urban Tourism": {
                            "children": {
                                "City Landmarks": {
                                    "children": {
                                        "Museums": {"children": {}},
                                        "Historical Sites": {"children": {}},
                                        "Architectural Tours": {"children": {}}
                                    }
                                },
                                "Shopping Districts": {
                                    "children": {
                                        "Luxury Malls": {"children": {}},
                                        "Souvenir Streets": {"children": {}},
                                        "Night Markets": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Nature Tourism": {
                            "children": {
                                "Mountain Destinations": {
                                    "children": {
                                        "Hiking Trails": {"children": {}},
                                        "Campsites": {"children": {}},
                                        "Scenic Overlooks": {"children": {}}
                                    }
                                },
                                "Coastal Areas": {
                                    "children": {
                                        "Beach Resorts": {"children": {}},
                                        "Marine Parks": {"children": {}},
                                        "Surfing Zones": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Hospitality Services": {
                    "children": {
                        "Lodging Options": {
                            "children": {
                                "Luxury Hotels": {
                                    "children": {
                                        "Presidential Suites": {"children": {}},
                                        "Spa Services": {"children": {}},
                                        "Concierge Programs": {"children": {}}
                                    }
                                },
                                "Budget Hotels": {
                                    "children": {
                                        "Shared Rooms": {"children": {}},
                                        "Basic Amenities": {"children": {}},
                                        "Express Check-in": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Dining Experiences": {
                            "children": {
                                "Local Cuisine": {
                                    "children": {
                                        "Street Food": {"children": {}},
                                        "Traditional Restaurants": {"children": {}},
                                        "Cultural Dining Events": {"children": {}}
                                    }
                                },
                                "Fine Dining": {
                                    "children": {
                                        "Michelin-Star Restaurants": {"children": {}},
                                        "Tasting Menus": {"children": {}},
                                        "Wine Pairings": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                },
                "Travel Services": {
                    "children": {
                        "Transportation": {
                            "children": {
                                "Air Travel": {
                                    "children": {
                                        "Domestic Flights": {"children": {}},
                                        "International Routes": {"children": {}},
                                        "Airport Lounges": {"children": {}}
                                    }
                                },
                                "Rail & Road": {
                                    "children": {
                                        "High-Speed Rail": {"children": {}},
                                        "Tour Buses": {"children": {}},
                                        "Car Rentals": {"children": {}}
                                    }
                                }
                            }
                        },
                        "Guided Tours": {
                            "children": {
                                "Cultural Tours": {
                                    "children": {
                                        "City Walks": {"children": {}},
                                        "Historical Guides": {"children": {}},
                                        "Audio Tours": {"children": {}}
                                    }
                                },
                                "Adventure Tours": {
                                    "children": {
                                        "Ziplining": {"children": {}},
                                        "Rafting Trips": {"children": {}},
                                        "Desert Safaris": {"children": {}}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}









GLOBAL_VALUE_RECORD = {}


def random_proportions(n, parent_global, force_min_ratio=None):
    """ 生成符合全局约束的本地比例，支持强制最小比例（如 Root 子节点 ≥ 0.2） """
    # 设置动态最小比例限制
    min_local = max(force_min_ratio if force_min_ratio else 0.05, 5.0 / parent_global)

    # 可行性验证
    if n * min_local > 1.0:
        raise ValueError(f"父节点{parent_global}%无法生成{n}个子节点")

    max_retries = 10  # 最大重试次数
    for _ in range(max_retries):
        remaining = 1.0 - n * min_local
        base = [random.expovariate(1.0) for _ in range(n)]
        total = sum(base)
        ratios = [min_local + (p / total) * remaining for p in base]

        # 如果设置了强制最小比例，则所有比例必须 ≥ force_min_ratio
        if force_min_ratio and any(r < force_min_ratio for r in ratios):
            continue  # 不满足比例要求，重新生成

        # 检查1：禁止完美均分模式（误差<0.1%）
        expected_ratio = 1.0 / n
        if sum(1 for r in ratios if abs(r - expected_ratio) / expected_ratio < 0.03) == n:
            if parent_global * expected_ratio >= 5.0:
                continue

        # 检查2：禁止出现超过3个相同比例
        rounded_ratios = [round(r, 3) for r in ratios]
        ratio_counts = defaultdict(int)
        for r in rounded_ratios:
            ratio_counts[r] += 1
        if max(ratio_counts.values()) > 3:
            continue

        return ratios

    # 最终保底方案：使用 Dirichlet 分布生成
    alpha = [0.5] * n
    dirichlet_ratios = list(np.random.dirichlet(alpha))
    return [min_local + r * (1 - n * min_local) for r in dirichlet_ratios]



# 新增：层级节点池，全局控制每层节点总数为 3-5
layer_node_pool = defaultdict(list)

def generate_proportional_data(node, parent_name="Root", current_level=1):
    data = []

    if current_level > MAX_LEVEL:
        return []

    if isinstance(node, dict) and "children" in node:
        children_all = list(node["children"].items())

        # 当前层还未填满
        if len(layer_node_pool[current_level]) < 3:
            needed = 3 - len(layer_node_pool[current_level])
        else:
            needed = max(0, 5 - len(layer_node_pool[current_level]))

        remaining_slots = 5 - len(layer_node_pool[current_level])
        if remaining_slots <= 0:
            return []

        random.shuffle(children_all)
        children = children_all[:remaining_slots]
        if not children:
            return []

        parent_global = GLOBAL_VALUES[parent_name]
        try:
            ratios = random_proportions(len(children), parent_global, force_min_ratio=0.2 if parent_name == "Root" else None)
        except ValueError:
            return []

        raw_child_globals = [parent_global * r for r in ratios]

        # ✅ 精确调整子节点之和为父节点值，确保值总和一致
        sum_raw = sum(raw_child_globals)
        correction = parent_global - sum_raw
        raw_child_globals[0] += correction  # 调整第一个子节点补差值
        raw_child_globals = [round(v, 2) for v in raw_child_globals]

        for (child_name, child_node), child_global in zip(children, raw_child_globals):
            if child_global < 5.0:
                continue

            GLOBAL_VALUES[child_name] = child_global
            layer_node_pool[current_level].append(child_name)
            data.append([parent_name, child_name, child_global])

            if current_level < MAX_LEVEL:
                data += generate_proportional_data(child_node, child_name, current_level + 1)

    return data


# 保存路径
output_folder = './csv/sunburst'
os.makedirs(output_folder, exist_ok=True)

# 自动生成文件名
def get_next_filename():
    prefix = f'sunburst_{big_theme_name}_'
    existing_files = [f for f in os.listdir(output_folder) if f.startswith(prefix) and f.endswith('.csv')]

    max_num = 0
    for f in existing_files:
        try:
            num_part = f.rsplit('_', 1)[-1].replace('.csv', '')
            num = int(num_part)
            max_num = max(max_num, num)
        except ValueError:
            continue

    return f'{prefix}{max_num + 1}.csv'


# 保存数据
# 修改后的保存函数
def save_tree_data_to_multiple_csv(num_files):
    for _ in range(num_files):
        GLOBAL_VALUES.clear()
        GLOBAL_VALUES['Root'] = 100.0
        
        layer_node_pool.clear()  # ✅ 必须清空层级池，否则后续轮次会失败
        tree_data = generate_proportional_data(tree_structure['children']['Root'])
        df = pd.DataFrame(tree_data, columns=["parent", "child", "value"])

        # 最终全局验证
        invalid = [k for k, v in GLOBAL_VALUES.items() if v < 5 and k != 'Root']
        if invalid:
            print(f"⚠️ 存在违规节点: {invalid}")
            continue

        csv_file = os.path.join(output_folder, get_next_filename())
        title = f'{big_theme_name},{theme_name},%'

        with open(csv_file, "w", newline='') as file:
            writer = csv.writer(file)
            writer.writerow([big_theme_name, theme_name, "%"])
            
            writer.writerow(["parent", "child", "value"])
            for row in df.itertuples(index=False, name=None):
                writer.writerow(row)

        print(f"✅ 数据已保存至 {csv_file}")
        print("\n📄 生成的数据样例：")
        print(df.head(10))

# 示例生成 5 份
save_tree_data_to_multiple_csv(500)

✅ 数据已保存至 ./csv/sunburst\sunburst_Tourism and Hospitality_1.csv

📄 生成的数据样例：
              parent              child  value
0               Root    Travel Services  34.17
1    Travel Services     Transportation  27.30
2     Transportation         Air Travel   9.40
3     Transportation        Rail & Road  17.90
4        Rail & Road        Car Rentals   6.29
5        Rail & Road    High-Speed Rail   6.18
6        Rail & Road         Tour Buses   5.43
7    Travel Services       Guided Tours   6.87
8               Root  Destination Types  35.23
9  Destination Types     Nature Tourism  20.76
✅ 数据已保存至 ./csv/sunburst\sunburst_Tourism and Hospitality_2.csv

📄 生成的数据样例：
                 parent                  child  value
0                  Root      Destination Types  29.12
1     Destination Types          Urban Tourism   8.58
2     Destination Types         Nature Tourism  20.54
3        Nature Tourism          Coastal Areas  13.53
4        Nature Tourism  Mountain Destinations   7.01
5        