In [5]:
import pandas as pd
from subprocess import call

In [13]:
def mod2bed(filepath,outpath):
    df = pd.read_csv(filepath,usecols=['chrom','pos_1base','strand']) # chrom   start   end   name   score   strand
    df['s'] = df['pos_1base']-1
    df['e'] = df['pos_1base']+1
    df['score'] = 0
    df_selected = df.loc[:, ['chrom', 's', 'e', 'pos_1base', 'score', 'strand']]
    df_selected.to_csv(outpath,sep = "\t", index=None,header=None)


In [14]:
mod2bed("/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/m6Aqtl/nano_merge_m6A_sites.csv","/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/GeneFunctionalPathways/m6A.bed")
mod2bed("/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/m5Cqtl/nano_merge_m5C_sites.csv","/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/GeneFunctionalPathways/m5C.bed")
mod2bed("/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/pseUqtl/nano_merge_pseU_sites.csv","/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/GeneFunctionalPathways/pseU.bed")
mod2bed("/mnt/hpc/home/xuxinran/DirectSeq/data/zhaolin_240206/240201-zhaolin-RNA-merge/v0.8.1/Iqtl/nano_merge_I_sites.csv","/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/GeneFunctionalPathways/I.bed")

In [18]:
gff_df = pd.read_csv("/mnt/hpc/home/xuxinran/REF/hg19/gencode.v47lift37.annotation.gff3.gz",usecols = [0,2,3,4,5,6,8],header=None, names=['chrom', 'type', 's', 'e', 'name', 'strand','info'], sep="\t", comment="#")
gff_df = gff_df[gff_df["type"] == "gene"]
gff_df['score'] = 0
gff_df['gene_id'] = gff_df['info'].apply(lambda x: x.split("gene_id=")[1].split(";")[0])
# gff_df['gene_name'] = gff_df['info'].apply(lambda x: x.split("gene_name=")[1].split(";")[0])
# gff_df['name'] = gff_df['gene_id']+"-"+gff_df['gene_name']
# gff_df = gff_df.loc[:, ['chrom', 's', 'e', 'name', 'score', 'strand']]
gff_df = gff_df.loc[:, ['chrom', 's', 'e', 'gene_id', 'score', 'strand']]
gff_df.to_csv("/mnt/hpc/home/xuxinran/DirectSeq/8_downsteam/GeneFunctionalPathways/gff.bed",header=None,sep = "\t", index=None)

```shell
sort -t, -k1,1V -k2,2n gff.bed > gff_sorted.bed
sort -t, -k1,1V /mnt/hpc/home/xuxinran/REF/hg19/hg19.chrom.sizes > hg19.chrom.sizes.sorted

sort -t, -k1,1V -k2,2n m6A.bed > m6A_sorted.bed
bedtools intersect -a m6A_sorted.bed -b gff_sorted.bed -g hg19.chrom.sizes.sorted -wa -wb -s -sorted > m6A_gene.bed

sort -t, -k1,1V -k2,2n m5C.bed > m5C_sorted.bed
bedtools intersect -a m5C_sorted.bed -b gff_sorted.bed -g hg19.chrom.sizes.sorted -wa -wb -s -sorted > m5C_gene.bed

sort -t, -k1,1V -k2,2n pseU.bed > pseU_sorted.bed
bedtools intersect -a pseU_sorted.bed -b gff_sorted.bed -g hg19.chrom.sizes.sorted -wa -wb -s -sorted > pseU_gene.bed

sort -t, -k1,1V -k2,2n I.bed > I_sorted.bed
bedtools intersect -a I_sorted.bed -b gff_sorted.bed -g hg19.chrom.sizes.sorted -wa -wb -s -sorted > I_gene.bed

cut -f10 pseU_gene.bed | sort | uniq | sed 's/\.[^ ]*//g' > pseU_gene_ids.txt
cut -f10 m6A_gene.bed | sort | uniq | sed 's/\.[^ ]*//g' > m6A_gene_ids.txt
cut -f10 m5C_gene.bed | sort | uniq | sed 's/\.[^ ]*//g' > m5C_gene_ids.txt
cut -f10 I_gene.bed | sort | uniq | sed 's/\.[^ ]*//g' > I_gene_ids.txt

```

```R
library(org.Hs.eg.db)
library(clusterProfiler)

#读取基因列表文件中的基因名称
pseU_gene_ids <- read.table("pseU_gene_ids.txt", header = FALSE, stringsAsFactors = FALSE)$V1
# 转换 Ensembl Gene ID 为 Entrez Gene ID
pseU_gene_symbol <- bitr(pseU_gene_ids, fromType = "ENSEMBL", toType = "SYMBOL", OrgDb = org.Hs.eg.db)
#GO富集分析
pseU_enrich.go <- enrichGO(gene = pseU_gene_symbol$SYMBOL,  #待富集的基因列表
    OrgDb = 'org.Hs.eg.db',  #指定物种的基因数据库，示例物种是绵羊（sheep）
    keyType = 'SYMBOL',  #指定给定的基因名称类型，例如这里以 entrze id 为例
    ont = 'BP',  #GO Ontology，可选 BP、MF、CC，也可以指定 ALL 同时计算 3 者
    pAdjustMethod = 'fdr',  #指定 p 值校正方法
    pvalueCutoff = 0.05,  #指定 p 值阈值（可指定 1 以输出全部）
    qvalueCutoff = 0.2,  #指定 q 值阈值（可指定 1 以输出全部）
    readable = FALSE)
#输出
write.table(pseU_enrich.go, 'pseU_enrich.go.txt', sep = '\t', row.names = FALSE, quote = FALSE)


m6A_gene_ids <- read.table("m6A_gene_ids.txt", header = FALSE, stringsAsFactors = FALSE)$V1
m6A_gene_symbol <- bitr(m6A_gene_ids, fromType = "ENSEMBL", toType = "SYMBOL", OrgDb = org.Hs.eg.db)
m6A_enrich.go <- enrichGO(gene = m6A_gene_symbol$SYMBOL,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'SYMBOL',
    ont = 'BP',
    pAdjustMethod = 'fdr',
    pvalueCutoff = 0.05,
    qvalueCutoff = 0.2,
    readable = FALSE)
write.table(m6A_enrich.go, 'm6A_enrich.go.txt', sep = '\t', row.names = FALSE, quote = FALSE)


I_gene_ids <- read.table("I_gene_ids.txt", header = FALSE, stringsAsFactors = FALSE)$V1
I_gene_symbol <- bitr(I_gene_ids, fromType = "ENSEMBL", toType = "SYMBOL", OrgDb = org.Hs.eg.db)
I_enrich.go <- enrichGO(gene = I_gene_symbol$SYMBOL,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'SYMBOL',
    ont = 'BP',
    pAdjustMethod = 'fdr',
    pvalueCutoff = 0.05,
    qvalueCutoff = 0.2,
    readable = FALSE)
write.table(I_enrich.go, 'I_enrich.go.txt', sep = '\t', row.names = FALSE, quote = FALSE)


m5C_gene_ids <- read.table("m5C_gene_ids.txt", header = FALSE, stringsAsFactors = FALSE)$V1
m5C_gene_symbol <- bitr(m5C_gene_ids, fromType = "ENSEMBL", toType = "SYMBOL", OrgDb = org.Hs.eg.db)
m5C_enrich.go <- enrichGO(gene = m5C_gene_symbol$SYMBOL,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'SYMBOL',
    ont = 'BP',
    pAdjustMethod = 'fdr',
    pvalueCutoff = 0.05,
    qvalueCutoff = 0.2,
    readable = FALSE)
write.table(m5C_enrich.go, 'm5C_enrich.go.txt', sep = '\t', row.names = FALSE, quote = FALSE)


library(enrichplot)
library(ggplot2)
pt <- pairwise_termsim(m6A_enrich.go)
treep <- treeplot(pt, showCategory = 50)
ggsave(treep, filename = 'm6A_treeplot.pdf', width=20, height=15)

pt <- pairwise_termsim(m5C_enrich.go)
treep <- treeplot(pt, showCategory = 50)
ggsave(treep, filename = 'm5C_treeplot.pdf', width=20, height=15)

pt <- pairwise_termsim(I_enrich.go)
treep <- treeplot(pt, showCategory = 50)
ggsave(treep, filename = 'I_treeplot.pdf', width=20, height=15)

pt <- pairwise_termsim(pseU_enrich.go)
treep <- treeplot(pt, showCategory = 50)
ggsave(treep, filename = 'pseU_treeplot.pdf', width=20, height=15)

```