In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def plot_cumulative_proportion(csv_file,pdf_file,mod_name):
    df = pd.read_csv(csv_file, usecols=[0, 1, 2, 3, 14])
    
    # 为读取的列指定统一的列名
    df.columns = ['chrom', 'strand', 'snp_pos', 'mod_pos', 'BayesFactor']
    
    # 计算QTL和修饰位点的距离
    df['distance'] = (abs(df['snp_pos'] - df['mod_pos']))/1000
    df = df[df['distance']<=100]
    
    # 定义不同BF的阈值
    thresholds = [3, 10, 50]
    
    # 绘制图形
    plt.figure(figsize=(10,6))
    
    for threshold in thresholds:
        # 筛选出满足BF阈值的数据
        subset = df[df['BayesFactor'] > threshold]
        
        # 计算距离的累计比例
        distances = subset['distance'].sort_values()
        cumulative_proportion = np.arange(1, len(distances) + 1) / len(distances)
        
        # 绘制累积比例图
        plt.plot(distances, cumulative_proportion, label=f'BF > {threshold}')
    
    # 添加图例和标签
    plt.xlabel(f'Distance between QTL and {mod_name} (kb)')
    plt.ylabel('Cumulative Proportion')
    plt.legend()
    plt.grid(True)
    # 保存图像为PDF
    plt.savefig(pdf_file, format='pdf')
    plt.close()  # 关闭图形窗口，防止显示

# 输入文件路径
outdir = "/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/mod_snp_distance"
m6A_file = '/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/m6Aqtl/nano_merge_m6A_summary.csv'  # 替换为你的CSV文件路径
m6A_pdf = f'{outdir}/m6A_dis.pdf'
plot_cumulative_proportion(m6A_file,m6A_pdf,"m6A")

m5C_file = '/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/m5Cqtl/nano_merge_m5C_summary.csv'
m5C_pdf = f'{outdir}/m5C_dis.pdf'
plot_cumulative_proportion(m5C_file,m5C_pdf,"m5C")

pseU_file = '/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/pseUqtl/nano_merge_pseU_summary.csv'
pseU_pdf = f'{outdir}/pseU_dis.pdf'
plot_cumulative_proportion(pseU_file,pseU_pdf,"pseU")

inosine_file = '/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/Iqtl/nano_merge_I_summary.csv'
inosine_pdf = f'{outdir}/i_dis.pdf'
plot_cumulative_proportion(inosine_file,inosine_pdf,"Inosine")