# 绘制散点图

安装
```r
install.packages(".", repos = NULL, type = "source")
```

首先methy1环境 找到所有的需要分析的tsv文件

In [None]:
import pandas as pd
import os
import numpy as np
from scipy.stats import norm
from scipy.optimize import minimize_scalar
from subprocess import call

os.chdir('/mnt/hpc/home/xuxinran/DirectSeq/9_image_of_graduation/2')

In [13]:
qtl_dict = {
        'inosine-QTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/Iqtl/nano_merge_I_summary.csv',
        'puQTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/puqtl/nano_merge_promoter_summary.csv',
        'm6A-QTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/m6Aqtl/nano_merge_m6A_summary.csv',
        'pseU-QTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/pseUqtl/nano_merge_pseU_summary.csv',
        'm5C-QTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/m5Cqtl/nano_merge_m5C_summary.csv',
        'stQTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/stqtl/nano_merge_stability_summary.csv',
        '3aQTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/3aqtl/nano_merge_APA_summary.csv',
        'irQTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/irqtl/nano_merge_isoform_summary.csv'
    }

In [15]:
def bf_to_pvalue(bf, f, trait_type="quant", N=104, s=None):
    """
    Calculate p-value from Bayes Factor for a SNP given other parameters.

    Parameters:
    - bf: Bayes Factor (not log)
    - f: minor allele frequency (MAF)
    - trait_type: 'quant' for quantitative trait or 'cc' for case-control
    - N: sample size
    - s: proportion of samples that are cases (required if trait_type is 'cc')

    Returns:
    - Estimated p-value
    """
    def Var_data(f, N):
        """Variance calculation for quantitative trait."""
        return 1 / (2 * N * f * (1 - f))
    def Var_data_cc(f, N, s):
        """Variance calculation for case-control data."""
        return 1 / (2 * N * f * (1 - f) * s * (1 - s))
    if trait_type == "quant":
        sd_prior = 0.15
        V = Var_data(f, N)
    else:
        sd_prior = 0.2
        V = Var_data_cc(f, N, s)
    r = sd_prior**2 / (sd_prior**2 + V)
    def objective(z):
        lABF = 0.5 * (np.log(1 - r) + (r * z**2))
        return abs(np.exp(lABF) - bf)
    # Find the z-score that minimizes the difference between calculated BF and given BF
    result = minimize_scalar(objective)
    z = abs(result.x)  # Take absolute value as z-score is always positive
    # Calculate p-value from z-score
    p = 2 * (1 - norm.cdf(z))
    return p

In [29]:
df = pd.read_csv("ENSG00000276045.4_11.csv")

df = df[["rsID", "chrom","snp_pos_1base","strand","BF_m6A","BF_m5C"]]

m6A_qtl = pd.read_csv(qtl_dict['m6A-QTL'])
m6A_qtl = m6A_qtl[(m6A_qtl['chrom']=="chr12")&(m6A_qtl['strand']=="+")]
# 删除 m6A_qtl 中的重复行
m6A_qtl.drop_duplicates(subset=['rsID', 'chrom', 'snp_pos_1base', 'strand'], keep='first', inplace=True)

# 合并 DataFrame
df_merged = pd.merge(df, m6A_qtl[['rsID', 'chrom', 'snp_pos_1base', 'strand', 'EAF']], 
                     on=['rsID', 'chrom', 'snp_pos_1base', 'strand'], how='left')

# 打印合并后的 DataFrame
df_merged['pvalue_m6A'] = df_merged.apply(lambda row: bf_to_pvalue(row['BF_m6A'], row['EAF']), axis=1)
df_merged['pvalue_m5C'] = df_merged.apply(lambda row: bf_to_pvalue(row['BF_m5C'], row['EAF']), axis=1)

# for i in range(0,len(df_merged)):
#     df_merged.at[i,'rsID_n'] = df_merged.at[i,'rsID'] + '_' + str(i)



res = df_merged[['rsID','pvalue_m6A','pvalue_m5C']]
res.to_csv("paint.csv",index=False)

在用methy_R运行

```r
# 加载必要的库
library(locuscomparer, quietly=TRUE)
library(ggplot2)

# 读取 CSV 文件
res <- read.csv("paint.csv")

# 拆分 DataFrame
in_fn1 <- res[, c("rsID", "pvalue_m6A")]
in_fn2 <- res[, c("rsID", "pvalue_m5C")]

# 重命名列以符合 locuscomparer 的输入要求
colnames(in_fn1) <- c("rsid", "pval")
colnames(in_fn2) <- c("rsid", "pval")

# 绘制 locuscompare 图
p <- locuscompare(
  in_fn1 = in_fn1,
  in_fn2 = in_fn2,
  title1 = "m6A",
  title2 = "m5C"
)

# 保存图像
ggsave("locuscompare_m6A_m5C.png", plot = p, device = "png")

```