# FigureYa196Plus_PanPie_py

**Author:** Ying Ge

In [None]:
from datetime import datetime
from IPython.display import Markdown

current_date = datetime.now().strftime("%Y-%m-%d")
Markdown(f"**Date**: {current_date}")

## Academic Citation
If you use this code in your work or research, we kindly request that you cite our publication:

Xiaofan Lu, et al. (2025). FigureYa: A Standardized Visualization Framework for Enhancing Biomedical Data Interpretation and Research Efficiency. iMetaMed. https://doi.org/10.1002/imm3.70005

## 需求描述
## Requirement Description

绘制多个饼图展示不同临床预后分期的分布差异，用python实现。

![](example.png)

出自<https://www.nature.com/articles/s41388-019-1026-9>

Fig. 4 Identification of CNV-driven rRNA metabolism-related genes with clinical relevance.
f, g Pie charts showing the Chi-squared test of clinicopathologic factors for PRE in CRC (f) and LUAD (g) tumor samples from the TCGA. 

## 应用场景
## Application Scenarios
搞清楚输入数据跟图的对应关系、理解每部分代码所画的内容，就可以套用到更多类型的数据上。

## 环境设置
## Environment Configuration

In [None]:
pip install pandas matplotlib seaborn scipy numpy

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import fisher_exact
import seaborn as sns
from matplotlib.patches import Circle
import warnings
warnings.filterwarnings('ignore')
 
def figureya_pan_pie(input_file="easy_input.txt", output_prefix="pan_pie"):
    # 读取数据
    dat = pd.read_csv(input_file, sep='\t', index_col=0)
    print("数据预览:")
    print(dat.head())
    
    # 设置分组变量
    gname = "Risk"  # 分组列名
    vname = [col for col in dat.columns if col != gname]  # 其他变量列名
    
    # 计算每个变量的饼图数据和Fisher检验p值
    pie_high = {}
    pie_low = {}
    fisher_p = {}
    
    for var in vname:
        # 创建交叉表
        crosstab = pd.crosstab(dat[gname], dat[var])
        
        # Fisher精确检验
        if crosstab.shape == (2, 2):
            odds_ratio, p_value = fisher_exact(crosstab)
        else:
            # 对于非2x2表，使用卡方检验
            from scipy.stats import chi2_contingency
            chi2, p_value, dof, expected = chi2_contingency(crosstab)
        
        fisher_p[var] = p_value
        
        # 计算百分比
        pie_data = []
        for risk_level in ['High', 'Low']:
            if risk_level in crosstab.index:
                row_data = crosstab.loc[risk_level]
                total = row_data.sum()
                pct_data = row_data / total
                
                for category, freq in row_data.items():
                    pie_data.append({
                        'Risk': risk_level,
                        'Category': category,
                        'Freq': freq,
                        'Pct': pct_data[category]
                    })
        
        pie_df = pd.DataFrame(pie_data)
        
        # 分别存储High和Low的数据
        pie_high[var] = pie_df[pie_df['Risk'] == 'High'].copy()
        pie_low[var] = pie_df[pie_df['Risk'] == 'Low'].copy()
    
    # 开始绘制多面板饼图
    n_vars = len(vname)
    cols = min(4, n_vars)  # 每行最多4个图
    rows = (n_vars + cols - 1) // cols
    
    fig, axes = plt.subplots(rows * 2, cols, figsize=(cols * 4, rows * 6))
    if rows == 1 and cols == 1:
        axes = np.array([[axes, None], [None, None]])
    elif rows == 1:
        axes = axes.reshape(2, -1)
    elif cols == 1:
        axes = axes.reshape(-1, 1)
    
    # 设置颜色方案
    colors_high = sns.color_palette("Reds", n_colors=10)
    colors_low = sns.color_palette("Blues", n_colors=10)
    
    for i, var in enumerate(vname):
        row = (i // cols) * 2
        col = i % cols
        
        # 绘制High风险组饼图
        if row < axes.shape[0] and col < axes.shape[1]:
            ax_high = axes[row, col]
            
            if not pie_high[var].empty:
                wedges, texts, autotexts = ax_high.pie(
                    pie_high[var]['Pct'], 
                    labels=pie_high[var]['Category'],
                    colors=colors_high[:len(pie_high[var])],
                    autopct='%1.1f%%',
                    startangle=90
                )
                ax_high.set_title(f'{var} - High Risk\np = {fisher_p[var]:.3f}', 
                                 fontsize=12, fontweight='bold')
            else:
                ax_high.text(0.5, 0.5, 'No Data', ha='center', va='center')
                ax_high.set_title(f'{var} - High Risk (No Data)')
        
        # 绘制Low风险组饼图
        if row + 1 < axes.shape[0] and col < axes.shape[1]:
            ax_low = axes[row + 1, col]
            
            if not pie_low[var].empty:
                wedges, texts, autotexts = ax_low.pie(
                    pie_low[var]['Pct'], 
                    labels=pie_low[var]['Category'],
                    colors=colors_low[:len(pie_low[var])],
                    autopct='%1.1f%%',
                    startangle=90
                )
                ax_low.set_title(f'{var} - Low Risk', fontsize=12, fontweight='bold')
            else:
                ax_low.text(0.5, 0.5, 'No Data', ha='center', va='center')
                ax_low.set_title(f'{var} - Low Risk (No Data)')
    
    # 隐藏多余的子图
    for i in range(n_vars, rows * cols):
        row = (i // cols) * 2
        col = i % cols
        if row < axes.shape[0] and col < axes.shape[1]:
            axes[row, col].set_visible(False)
        if row + 1 < axes.shape[0] and col < axes.shape[1]:
            axes[row + 1, col].set_visible(False)
    
    plt.tight_layout()
    plt.savefig(f'{output_prefix}.png', dpi=300, bbox_inches='tight')
    plt.savefig(f'{output_prefix}.pdf', bbox_inches='tight')
    plt.show()
    
    # 输出统计结果
    print("\nFisher精确检验结果:")
    for var, p in fisher_p.items():
        significance = "***" if p < 0.001 else "**" if p < 0.01 else "*" if p < 0.05 else "ns"
        print(f"{var}: p = {p:.6f} {significance}")
    
    return pie_high, pie_low, fisher_p
 
# 创建测试数据
def create_test_data():
    """创建测试数据文件"""
    np.random.seed(42)
    n_samples = 200
    
    # 生成示例数据
    data = {
        'Risk': np.random.choice(['High', 'Low'], n_samples),
        'Stage': np.random.choice(['I', 'II', 'III', 'IV'], n_samples),
        'Grade': np.random.choice(['G1', 'G2', 'G3'], n_samples),
        'Gender': np.random.choice(['Male', 'Female'], n_samples),
        'Age_Group': np.random.choice(['<=60', '>60'], n_samples)
    }
    
    df = pd.DataFrame(data)
    df.index = [f'Sample_{i+1}' for i in range(n_samples)]
    df.to_csv('easy_input.txt', sep='\t')
    print("测试数据已创建: easy_input.txt")
 
# 使用示例
if __name__ == "__main__":
    # 创建测试数据（如果没有输入文件）
    create_test_data()
    
    # 运行分析
    pie_high, pie_low, fisher_p = figureya_pan_pie()

## Session Info

In [None]:
import IPython
print(IPython.sys_info())

!jupyter nbconvert --to html FigureYa196Plus_PanPie_py.ipynb