In [None]:
import pandas as pd
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu
import seaborn as sns
from scipy.optimize import minimize_scalar
import argparse
import os
import pickle
from subprocess import call
import itertools

In [4]:
work_dir = "/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR"
os.chdir(work_dir)
gene_bed_file = "/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/stqtl/gene_merge.bed"
apa_bed_file = "/mnt/hpc/home/xuxinran/DirectSeq/nano3aqtl/hg19.apadb_v2_final.sorted.bed"
isoform_bed_file = "/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/irqtl/nano_merge.annotated_transcripts.bed"
pu_bed_File = "/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/puqtl/promoter_final.bed"

In [5]:
def bf_to_pvalue(bf, f, trait_type, N, s=None):
    """
    Calculate p-value from Bayes Factor for a SNP given other parameters.

    Parameters:
    - bf: Bayes Factor (not log)
    - f: minor allele frequency (MAF)
    - trait_type: 'quant' for quantitative trait or 'cc' for case-control
    - N: sample size
    - s: proportion of samples that are cases (required if trait_type is 'cc')

    Returns:
    - Estimated p-value
    """
    def Var_data(f, N):
        """Variance calculation for quantitative trait."""
        return 1 / (2 * N * f * (1 - f))
    def Var_data_cc(f, N, s):
        """Variance calculation for case-control data."""
        return 1 / (2 * N * f * (1 - f) * s * (1 - s))
    if trait_type == "quant":
        sd_prior = 0.15
        V = Var_data(f, N)
    else:
        sd_prior = 0.2
        V = Var_data_cc(f, N, s)
    r = sd_prior**2 / (sd_prior**2 + V)
    def objective(z):
        lABF = 0.5 * (np.log(1 - r) + (r * z**2))
        return abs(np.exp(lABF) - bf)
    # Find the z-score that minimizes the difference between calculated BF and given BF
    result = minimize_scalar(objective)
    z = abs(result.x)  # Take absolute value as z-score is always positive
    # Calculate p-value from z-score
    p = 2 * (1 - norm.cdf(z))
    return p

In [6]:
def getSE(A1_m6A, A1_A, A2_m6A, A2_A):
    alpha1_post = 1 + A1_m6A
    beta1_post = 1 + A1_A
    alpha2_post = 1 + A2_m6A
    beta2_post = 1 + A2_A
    var_A1 = alpha1_post * beta1_post / ((alpha1_post + beta1_post)^2 * (alpha1_post + beta1_post + 1))
    var_A2 = alpha2_post * beta2_post / ((alpha2_post + beta2_post)^2 * (alpha2_post + beta2_post + 1))
    SE = np.sqrt(var_A1 + var_A2)
    return SE

In [4]:
qtl_dict = {
        'inosine-QTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/Iqtl/nano_merge_I_summary.csv',
        'puQTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/puqtl/nano_merge_promoter_summary.csv',
        'm6A-QTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/m6Aqtl/nano_merge_m6A_summary.csv',
        'pseU-QTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/pseUqtl/nano_merge_pseU_summary.csv',
        'm5C-QTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/m5Cqtl/nano_merge_m5C_summary.csv',
        # 'stQTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/stqtl/nano_merge_stability_summary.csv',
        '3aQTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/3aqtl/nano_merge_APA_summary.csv',
        'irQTL':'/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/irqtl/nano_merge_isoform_summary.csv'
    }

In [None]:
for qtl_type,qtl_file in qtl_dict.items():
    print(qtl_type)
    load_and_filter_qtl(qtl_file,qtl_type,work_dir, gene_bed_file, apa_bed_file, isoform_bed_file, pu_bed_File)

inosine-QTL


  return abs(np.exp(lABF) - bf)


puQTL
m6A-QTL


  return abs(np.exp(lABF) - bf)
  tmp2 = (x - v) * (fx - fw)


pseU-QTL


  qtl_data = pd.read_csv(qtl_file)
  return abs(np.exp(lABF) - bf)
  tmp2 = (x - v) * (fx - fw)


In [24]:
# stQTL求SE
def analyze_snp_stability_MannWhitneyU(A1_stability, A2_stability):
    total_count = len(A1_stability) + len(A2_stability)
    A1_count = len(A1_stability)
    A2_count = len(A2_stability)
    if (total_count < 10) or (A1_count == 0) or (A2_count == 0):
        return None,None
    stat, p_value = mannwhitneyu(A1_stability, A2_stability, alternative='two-sided')
    dominance = 0
    for x in A1_stability:
        for y in A2_stability:
            if x > y:
                dominance += 1
            elif x < y:
                dominance -= 1
    delta = dominance / (A1_count * A2_count)
    return p_value, delta

def bootstrap_delta_se(A1_stability, A2_stability, n_boot=100):
    A1_stability = eval(A1_stability)
    A2_stability = eval(A2_stability)
    delta_values = []
    for _ in range(n_boot):
        # 从原始数据中随机抽样
        A1_sample = np.random.choice(A1_stability, size=len(A1_stability), replace=True)
        A2_sample = np.random.choice(A2_stability, size=len(A2_stability), replace=True)
        # 计算 delta 值
        _, delta = analyze_snp_stability_MannWhitneyU(A1_sample, A2_sample)
        delta_values.append(delta)
    # 计算 SE
    se = np.std(delta_values)
    return se

In [41]:
def load_and_filter_qtl(qtl_file,qtl_type,work_dir, gene_bed_file, apa_bed_file, isoform_bed_file, pu_bed_File):
    if qtl_type == "stQTL":
        qtl_data = pd.read_csv(qtl_file)
        qtl_data['se'] = qtl_data.apply(lambda row: bootstrap_delta_se(row['A1_STscore_l'], row['A2_STscore_l']), axis=1)
        new_names = {'pvalue': f'{qtl_type}_pvalue', 'se': f'{qtl_type}_se', 'Beta': f'{qtl_type}_Beta'}
        qtl_data = qtl_data.rename(columns=new_names)
        qtl_data = qtl_data[["chrom","strand","snp_pos","rsID","A1","A2","EAF",f"{qtl_type}_Beta",f"{qtl_type}_pvalue",f"{qtl_type}_se"]]
    elif qtl_type in ['m6A-QTL','pseU-QTL','m5C-QTL','inosine-QTL']:
        qtl_data = pd.read_csv(qtl_file)
        qtl_data['Pvalue'] = qtl_data.apply(lambda row: bf_to_pvalue(row['BayesFactor'], row['EAF'], "quant", 104), axis=1)
        qtl_data['se'] = qtl_data.apply(lambda row: getSE(row.iloc[10], row.iloc[8], row.iloc[11], row.iloc[9]), axis=1)
        qtl_data = qtl_data.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7, 17, 18, 19]]
        qtl_data.columns = ["chrom","strand","snp_pos",f"{qtl_type}_mod_pos","rsID","A1","A2","EAF",f"{qtl_type}_Beta",f"{qtl_type}_pvalue",f"{qtl_type}_se"]
    else:
        qtl_data = pd.read_csv(qtl_file)
        qtl_data['Pvalue'] = qtl_data.apply(lambda row: bf_to_pvalue(row['BayesFactor'], row['EAF'], "quant", 104), axis=1)
        qtl_data['se'] = qtl_data.apply(lambda row: getSE(row.iloc[7], row.iloc[9], row.iloc[8], row.iloc[10]), axis=1)
        qtl_data = qtl_data.iloc[:, [0,1,2,3,4,5,6,11,15,16,17]]
        qtl_data.columns = ["chrom","strand","snp_pos","rsID","A1","A2","EAF",f"{qtl_type}_id",f"{qtl_type}_Beta",f"{qtl_type}_pvalue",f"{qtl_type}_se"]
    qtl_data_copy = qtl_data.copy()
    save_esi(qtl_data_copy, qtl_type)
    qtl_data_copy = qtl_data.copy()
    save_esd(qtl_data_copy, qtl_type)
    qtl_data_copy = qtl_data.copy()
    save_epi_flist(qtl_data_copy, qtl_type, work_dir, gene_bed_file, apa_bed_file, isoform_bed_file, pu_bed_File)


In [40]:
def save_esi(df, qtl_type):
    df['chrom'] = df['chrom'].str.replace('chr', '', regex=False)
    df['rsID'] = df.apply(lambda row: f"{row['chrom']}:{str(row['snp_pos'])}" if row['rsID'] == "." else row['rsID'], axis=1)
    df['LD_pos'] = 0
    res_df = df[["chrom","rsID","LD_pos","snp_pos","A1","A2"]]
    res_df.to_csv(f'{qtl_type}.esi', index=False, sep="\t", header=False)

In [44]:
def save_esd(df, qtl_type):
    df['chrom'] = df['chrom'].str.replace('chr', '', regex=False)
    df['rsID'] = df.apply(lambda row: f"{row['chrom']}:{str(row['snp_pos'])}" if row['rsID'] == "." else row['rsID'], axis=1)
    if qtl_type in ['m6A-QTL','pseU-QTL','m5C-QTL','inosine-QTL']:
        dfs = df.groupby(f"{qtl_type}_mod_pos")
        for name, group in dfs:
            res_df = group[["chrom","rsID","snp_pos","A1","A2","EAF",f"{qtl_type}_Beta",f"{qtl_type}_se",f"{qtl_type}_pvalue"]]
            res_df.columns = ["Chr","SNP","Bp","A1","A2","Freq","Beta","se","p"]
            res_df.to_csv(f'{qtl_type}_{name}.esd', index=False, sep="\t", header=False)
    if qtl_type in ['puQTL','3aQTL','irQTL']:
        dfs = df.groupby(f"{qtl_type}_id")
        for name, group in dfs:
            res_df = group[["chrom","rsID","snp_pos","A1","A2","EAF",f"{qtl_type}_Beta",f"{qtl_type}_se",f"{qtl_type}_pvalue"]]
            res_df.columns = ["Chr","SNP","Bp","A1","A2","Freq","Beta","se","p"]
            res_df.to_csv(f'{qtl_type}_{name}.esd', index=False, sep="\t", header=False)


In [47]:
def save_epi_flist(df, qtl_type, work_dir,gene_bed_file,apa_bed_file,isoform_bed_file,pu_bed_file):
    if qtl_type in ['m6A-QTL','pseU-QTL','m5C-QTL','inosine-QTL']:
        df['s'] = df[f"{qtl_type}_mod_pos"].astype(int)-1
        df['e'] = df[f"{qtl_type}_mod_pos"].astype(int)+1
        df['score'] = 0
        bed_df = df[["chrom","s","e",f"{qtl_type}_mod_pos","score","strand"]]
        bed_df.to_csv(f'{qtl_type}_tmp.bed', sep='\t', index=False, header=False)
        call(f"bedtools intersect -a {qtl_type}_tmp.bed -b {gene_bed_file} -wa -wb -s > {qtl_type}_gene.bed", shell=True)
        gene_df = pd.read_csv(f'{qtl_type}_gene.bed', sep='\t', header=None, usecols=[0,3,5,9])
        gene_df.columns = ["chrom","mod_pos","strand","gene_id"]
        gene_df["name"] = gene_df['chrom']+"_"+gene_df['mod_pos'].astype(str)
        gene_df['chrom'] = gene_df['chrom'].str.replace('chr', '', regex=False)
        gene_df["LD_pos"] = 0
        res_df = gene_df[["chrom","name","LD_pos","mod_pos","gene_id","strand"]]
        call(f"rm {qtl_type}_tmp.bed", shell=True)
        flist_df = res_df.copy()
        flist_df['PathOfEsd'] = f'{work_dir}/{qtl_type}_'+ flist_df['mod_pos'].astype(str) +'.esd'
        flist_df.columns = ["Chr","ProbeID","GeneticDistance","ProbeBp","Gene","Orientation",'PathOfEsd']
        flist_df.to_csv(f'{qtl_type}.flist', sep='\t', index=False)
    elif qtl_type == "stQTL":
        df['s'] = df["snp_pos"].astype(int)-1
        df['e'] = df["snp_pos"].astype(int)+1
        df['score'] = 0
        df['rsID'] = df.apply(lambda row: f"{row['chrom']}:{str(row['snp_pos'])}" if row['rsID'] == "." else row['rsID'], axis=1)
        bed_df = df[["chrom","s","e","rsID","score","strand"]]
        bed_df.to_csv(f'{qtl_type}_tmp.bed', sep='\t', index=False, header=False)
        call(f"bedtools intersect -a {qtl_type}_tmp.bed -b {gene_bed_file} -wa -wb -s > {qtl_type}_gene.bed", shell=True)
        gene_df = pd.read_csv(f'{qtl_type}_gene.bed', sep='\t', header=None, usecols=[3,7,9])
        gene_df.columns = ["rsID","gene_start","gene_id"]
        qtl_data = pd.merge(qtl_data, gene_df, on="rsID", how="left")
        # make esd
        qtl_data['chrom'] = qtl_data['chrom'].str.replace('chr', '', regex=False)
        dfs = qtl_data.groupby('gene_id')
        for name, group in dfs:
            res_df = group[["chrom","rsID","snp_pos","A1","A2","EAF",f"{qtl_type}_Beta",f"{qtl_type}_se",f"{qtl_type}_pvalue"]]
            res_df.columns = ["Chr","SNP","Bp","A1","A2","Freq","Beta","se","p"]
            res_df.to_csv(f'{qtl_type}_{name}.esd', index=False, sep="\t", header=False)
        # make epi
        qtl_data["LD_pos"] = 0
        res_df = qtl_data[["chrom","rsID","LD_pos","gene_start","gene_id","strand"]]
        call(f"rm {qtl_type}_tmp.bed", shell=True)
        # make flist
        flist_df = res_df.copy()
        flist_df['PathOfEsd'] = f'{work_dir}/{qtl_type}_'+ flist_df['gene_id'] +'.esd'
        flist_df.columns = ["Chr","ProbeID","GeneticDistance","ProbeBp","Gene","Orientation",'PathOfEsd']
        flist_df.to_csv(f'{qtl_type}.flist', sep='\t', index=False)
    elif qtl_type == "3aQTL":
        call(f"bedtools intersect -a {apa_bed_file} -b {gene_bed_file} -wa -wb -s > apa_gene.bed",shell=True)
        apa_df = pd.read_csv("apa_gene.bed",sep="\t",header=None,usecols=[0,1,3,5,13],names=["chrom","apa_s","apa_id","strand","gene_id"])
        apa_df = apa_df[apa_df['apa_id'].isin(df[f"{qtl_type}_id"])]
        apa_df['chrom'] = apa_df['chrom'].str.replace('chr', '', regex=False)
        apa_df['LD_pos'] = 0
        res_df = apa_df[["chrom","apa_id","LD_pos","apa_s","gene_id","strand"]]
        call(f"rm apa_gene.bed",shell=True)
        flist_df = res_df.copy()
        flist_df['PathOfEsd'] = f'{work_dir}/{qtl_type}_'+ flist_df['apa_id'] +'.esd'
        flist_df.columns = ["Chr","ProbeID","GeneticDistance","ProbeBp","Gene","Orientation",'PathOfEsd']
        flist_df.to_csv(f'{qtl_type}.flist', sep='\t', index=False)
    elif qtl_type == "irQTL":
        call(f"bedtools intersect -a {isoform_bed_file} -b {gene_bed_file} -wa -wb -s > isoform_gene.bed",shell=True)
        isoform_df = pd.read_csv("isoform_gene.bed",sep="\t",header=None,usecols=[0,1,3,5,15],names=["chrom","isoform_s","isoform_id","strand","gene_id"])
        isoform_df = isoform_df[isoform_df['isoform_id'].isin(df[f"{qtl_type}_id"])]
        isoform_df['chrom'] = isoform_df['chrom'].str.replace('chr', '', regex=False)
        isoform_df['LD_pos'] = 0
        res_df = isoform_df[["chrom","isoform_id","LD_pos","isoform_s","gene_id","strand"]]
        call(f"rm isoform_gene.bed",shell=True)
        flist_df = res_df.copy()
        flist_df['PathOfEsd'] = f'{work_dir}/{qtl_type}_'+ flist_df['isoform_id'] +'.esd'
        flist_df.columns = ["Chr","ProbeID","GeneticDistance","ProbeBp","Gene","Orientation",'PathOfEsd']
        flist_df.to_csv(f'{qtl_type}.flist', sep='\t', index=False)
    else:
        call(f"bedtools intersect -a {pu_bed_file} -b {gene_bed_file} -wa -wb -s > promoter_gene.bed",shell=True)
        pu_df = pd.read_csv("promoter_gene.bed",sep="\t",header=None,usecols=[0,1,3,5,9],names=["chrom","pu_s","pu_id","strand","gene_id"])
        pu_df = pu_df[pu_df['pu_id'].isin(df[f"{qtl_type}_id"])]
        pu_df['chrom'] = pu_df['chrom'].str.replace('chr', '', regex=False)
        pu_df['LD_pos'] = 0
        res_df = pu_df[["chrom","pu_id","LD_pos","pu_s","gene_id","strand"]]
        call(f"rm promoter_gene.bed",shell=True)
        flist_df = res_df.copy()
        flist_df['PathOfEsd'] = f'{work_dir}/{qtl_type}_'+ flist_df['pu_id'] +'.esd'
        flist_df.columns = ["Chr","ProbeID","GeneticDistance","ProbeBp","Gene","Orientation",'PathOfEsd']
        flist_df.to_csv(f'{qtl_type}.flist', sep='\t', index=False)
    res_df.to_csv(f'{qtl_type}.epi', sep='\t', index=False, header=False)

In [8]:
df = pd.read_csv("/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/3aQTL/3aQTL_all.flist",sep="\t")
df = df.head(10000)
df.to_csv("/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/3aQTL/3aQTL.flist",sep="\t",index=False)

df = pd.read_csv("/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/3aQTL/3aQTL_all.epi",sep="\t")
df = df.head(10000)
df.to_csv("/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/3aQTL/3aQTL.epi",sep="\t",index=False)

In [None]:
df = pd.read_csv("/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/puqtl/promoter_final.bed",sep="\t")
# fourth_column = df.iloc[:, 3]
# duplicate_rows = df[fourth_column.duplicated(keep=False)]
# duplicate_rows = df[df.duplicated(subset=['PathOfEsd'], keep=False)]
df = df[df['PathOfEsd']=='/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/irQTL/irQTLENST0000071827811ENSG000001155391510.esd']
df


Unnamed: 0,Chr,ProbeID,GeneticDistance,ProbeBp,Gene,Orientation,PathOfEsd
1229,2,ENST00000718278.1-1-ENSG00000115539.15-10,0,101179454,ENSG00000238328.1,+,/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/M...
1230,2,ENST00000718278.1-1-ENSG00000115539.15-10,0,101179454,ENSG00000115539.14_9,+,/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/M...


In [None]:
df = pd.read_csv('/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/irqtl/nano_merge.annotated_transcripts.bed',sep="\t",header=None)
fourth_column = df.iloc[:, 3]
df = df[fourth_column=='ENST00000718278.1-1,ENSG00000115539.15-10']
dfENST00000718278.1_1_ENSG00000115539.15_10

ENST00000718278.1_1_ENSG00000115539.15_10

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11


In [5]:
dict7 = {
    "m6AQTL": "/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/m6AQTL/m6AQTL",
    "pseUQTL": "/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/pseUQTL/pseUQTL",
    "m5CQTL": "/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/m5CQTL/m5CQTL",
    "inosineQTL": "/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/inosineQTL/inosineQTL",
    "puQTL": "/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/puQTL/puQTL",
    "3aQTL": "/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/3aQTL/3aQTL",
    "irQTL": "/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/irQTL/irQTL",
}

In [6]:
def cross_combinations(list1, list2):
    result = []
    for x, y in itertools.product(list1, list2):
        if x != y:  # 添加判断条件，跳过 x 和 y 相同的组合
            result.append(f"{x}-{y}")
    for y, x in itertools.product(list2, list1):
        if y != x: # 添加判断条件，跳过 y 和 x 相同的组合
            result.append(f"{y}-{x}")
    return result

qtl_types = list(dict7.keys())
result = cross_combinations(qtl_types, qtl_types)
work_dir = '/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR'
for i in result:
    qtl1 = i.split("-")[0]
    qtl2 = i.split("-")[1]
    qtl1_file = dict7[qtl1]
    qtl2_file = dict7[qtl2]
    print(f'/mnt/hpc/home/xuxinran/GWAS/smr-1.3.1-linux-x86_64/smr --bfile /mnt/hpc/home/xuxinran/huvec_genotype/huvec_imputed --beqtl-summary {qtl1_file} --beqtl-summary {qtl2_file} --out {work_dir}/{qtl1}_{qtl2} --thread-num 10 ')


/mnt/hpc/home/xuxinran/GWAS/smr-1.3.1-linux-x86_64/smr --bfile /mnt/hpc/home/xuxinran/huvec_genotype/huvec_imputed --beqtl-summary /mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/m6AQTL/m6AQTL --beqtl-summary /mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/pseUQTL/pseUQTL --out /mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/m6AQTL_pseUQTL --thread-num 10 
/mnt/hpc/home/xuxinran/GWAS/smr-1.3.1-linux-x86_64/smr --bfile /mnt/hpc/home/xuxinran/huvec_genotype/huvec_imputed --beqtl-summary /mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/m6AQTL/m6AQTL --beqtl-summary /mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/m5CQTL/m5CQTL --out /mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/m6AQTL_m5CQTL --thread-num 10 
/mnt/hpc/home/xuxinran/GWAS/smr-1.3.1-linux-x86_64/smr --bfile /mnt/hpc/home/xuxinran/huvec_genotype/huvec_imputed --beqtl-summary /mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/m6AQTL/m6AQTL --beqtl-summary /mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/inosineQTL/inosineQTL --out 

In [17]:
## 结果解读 处理SMR的结果
import pandas as pd
import os

def merge_smr_files(folder_path):
    all_data = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".smr"):
            file_path = os.path.join(folder_path, filename)
            qtl1 = filename.split("_")[0]
            qtl2 = filename.split("_")[1].replace(".smr","")
            try:
                df = pd.read_csv(file_path, sep='\t',usecols=['Expo_ID','Expo_Chr','Expo_Gene','Outco_ID','Outco_Chr','Outco_Gene','topSNP','b_SMR', 'se_SMR', 'p_SMR'])
                n = len(df)
                if n == 0:
                    continue
                df = df[df['p_SMR']<(0.05/n)]
                df['Expo_type'] = qtl1
                df['Outco_type'] = qtl2
                if df.empty:
                    continue
                all_data.append(df)
            except Exception as e:
                print(f"读取文件 {filename} 时出错: {e}")

    if all_data:
        merged_df = pd.concat(all_data, ignore_index=True)
        return merged_df

# 示例用法
folder_path = "/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/"  # 替换为您的文件夹路径


a = merge_smr_files(folder_path)


In [18]:
a

Unnamed: 0,Expo_ID,Expo_Chr,Expo_Gene,Outco_ID,Outco_Chr,Outco_Gene,topSNP,b_SMR,se_SMR,p_SMR,Expo_type,Outco_type
0,TMEM43-1,3,ENSG00000170876.9_8;ENSG00000268279.4_14;ENSG0...,chr3-14184315,3,ENSG00000170876.9_8;ENSG00000268279.4_14;ENSG0...,rs116911972,-0.727548,0.172006,0.000023,puQTL,m6AQTL
1,TMEM43-3-TMEM43-2,3,ENSG00000170876.9_8;ENSG00000268279.4_14;ENSG0...,chr3-14184315,3,ENSG00000170876.9_8;ENSG00000268279.4_14;ENSG0...,rs116911972,0.727548,0.172006,0.000023,puQTL,m6AQTL
2,PXN-2,12,ENSG00000089159.17_19,chr12-120650014,12,ENSG00000089159.17_19,rs4767884,0.882719,0.218328,0.000053,puQTL,m6AQTL
3,PXN-1,12,ENSG00000089159.17_19,chr12-120650014,12,ENSG00000089159.17_19,rs4767884,-0.881969,0.217827,0.000051,puQTL,m6AQTL
4,BCL2L2-1-BCL2L2-2-BCL2L2-3,14,ENSG00000129473.11_16;ENSG00000258643.6_14;ENS...,chr14-23790840,14,ENSG00000129473.11_16;ENSG00000258643.6_14;ENS...,rs2295126,1.379020,0.377352,0.000258,puQTL,m6AQTL
...,...,...,...,...,...,...,...,...,...,...,...,...
1048,PXN-1,12,ENSG00000089159.17_19,chr12-120651706,12,ENSG00000089159.17_19,rs4767884,-0.617075,0.159256,0.000107,puQTL,pseUQTL
1049,PXN-2,12,ENSG00000089159.17_19,chr12-120652777,12,ENSG00000089159.17_19,rs4767884,0.618000,0.159211,0.000104,puQTL,pseUQTL
1050,PXN-1,12,ENSG00000089159.17_19,chr12-120652777,12,ENSG00000089159.17_19,rs4767884,-0.617476,0.158864,0.000102,puQTL,pseUQTL
1051,BCL2L2-1-BCL2L2-2-BCL2L2-3,14,ENSG00000129473.11_16;ENSG00000258643.6_14;ENS...,chr14-23791019,14,ENSG00000129473.11_16;ENSG00000258643.6_14;ENS...,rs2295126,1.263460,0.310626,0.000048,puQTL,pseUQTL


In [19]:
a.to_csv("/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/MR/smr_result.csv",index=False)

https://yanglab.westlake.edu.cn/software/smr/#SMRlocusplot19
画图