In [1]:
import pandas as pd
import os
path = os.path.expanduser("~/Documents/m6A/")
input_m6AQTL = f"{path}/Data/intron_m6AQTLs.txt"
output_m6AQTL = f"{path}/Data/intron_m6AQTLs.bed"
n = 50

#### Handle m6AQTL data
1. sort by chromosome and pos
2. plus and minus 50 for each pos

In [2]:
m6AQTL = pd.read_table(input_m6AQTL, header = 0)
m6AQTL = m6AQTL.sort_values(by = ["chr", "pos"])
m6AQTL = m6AQTL.set_index([[i for i in range(m6AQTL.shape[0])]])
m6AQTL["strand"] = m6AQTL.apply(lambda row: row["peakID"].split("_")[-1], axis = 1)
m6AQTL["gene_symbol"] = m6AQTL.apply(lambda row: row["peakID"].split("_")[0], axis = 1)
m6AQTL["start"] = m6AQTL.apply(lambda row: row["peakID"].split("_")[1], axis = 1)
m6AQTL["end"] = m6AQTL.apply(lambda row: row["peakID"].split("_")[2], axis = 1)
m6AQTL["pos1"] = m6AQTL.apply(lambda row: row["pos"] - n, axis = 1)
m6AQTL["pos2"] = m6AQTL.apply(lambda row: row["pos"] + n, axis = 1)
m6AQTL["width"] = m6AQTL.apply(lambda row: row["pos2"] - row["pos1"] + 1, axis = 1)
m6AQTL["width2"] = m6AQTL.apply(lambda row: int(row["peakID"].split("_")[2]) - int(row["peakID"].split("_")[1]), axis = 1)
m6AQTL["gap"] = m6AQTL.apply(lambda row: int(row["peakID"].split("_")[1]) - row["pos"], axis = 1)
m6AQTL[["chr", "pos1", "pos2", "gene_symbol", "width", "strand"]].to_csv(output_m6AQTL, index = False, header = False, sep = "\t")

In [3]:
m6AQTL[["chr", "pos", "start", "end", "pos1", "pos2", "gene_symbol", "width", "width2", "gap", "strand"]]

Unnamed: 0,chr,pos,start,end,pos1,pos2,gene_symbol,width,width2,gap,strand
0,chr1,899937,981788,981988,899887,899987,AGRN,101,200,81851,+
1,chr1,899938,981788,981988,899888,899988,AGRN,101,200,81850,+
2,chr1,899942,981788,981988,899892,899992,AGRN,101,200,81846,+
3,chr1,943468,949466,949516,943418,943518,ISG15,101,50,5998,+
4,chr1,945612,949466,949516,945562,945662,ISG15,101,50,3854,+
5,chr1,945861,949466,949516,945811,945911,ISG15,101,50,3605,+
6,chr1,946127,949466,949516,946077,946177,ISG15,101,50,3339,+
7,chr1,946135,949466,949516,946085,946185,ISG15,101,50,3331,+
8,chr1,947034,949466,949516,946984,947084,ISG15,101,50,2432,+
9,chr1,947538,949466,949516,947488,947588,ISG15,101,50,1928,+


### Use "bedtools" in bash under the depository
`~/Documents/m6A/Data/metApeakFisher`
```
bedtools intersect -a ../intron_m6AQTLs.bed -b peaks.merged.bed -s > peak.merged.m6AQTL.bed
```

In [5]:
from collections import Counter
Counter(m6AQTL["width2"])

Counter({26: 2,
         50: 884,
         97: 1,
         98: 22,
         99: 505,
         100: 457,
         136: 1,
         138: 2,
         139: 5,
         146: 26,
         147: 6,
         148: 79,
         149: 264,
         150: 359,
         170: 9,
         196: 1,
         197: 76,
         198: 44,
         199: 567,
         200: 197,
         201: 1,
         221: 1,
         225: 1,
         233: 1,
         244: 1,
         246: 1,
         247: 46,
         248: 216,
         249: 307,
         250: 300,
         253: 1,
         262: 27,
         275: 4,
         282: 2,
         284: 18,
         286: 16,
         295: 22,
         296: 50,
         297: 17,
         298: 172,
         299: 371,
         300: 18,
         313: 201,
         325: 1,
         326: 4,
         329: 2,
         337: 1,
         340: 1,
         341: 3,
         343: 2,
         344: 7,
         345: 7,
         346: 27,
         347: 201,
         348: 9,
         349: 93,
         3

In [6]:
m6AQTL

Unnamed: 0,chr,pos,peakID,snpID,dist,npvalue,beta,FDR,qvalue,strand,gene_symbol,start,end,pos1,pos2,width,width2,gap
0,chr1,899937,AGRN_981788_981988_+,rs143296006,-81852,1.257660e-04,0.209051,9.967526e-02,9.512829e-02,+,AGRN,981788,981988,899887,899987,101,200,81851
1,chr1,899938,AGRN_981788_981988_+,rs147467971,-81851,1.257660e-04,0.209051,9.967526e-02,9.512829e-02,+,AGRN,981788,981988,899888,899988,101,200,81850
2,chr1,899942,AGRN_981788_981988_+,rs71509448,-81847,4.651710e-05,0.256919,5.334776e-02,5.091415e-02,+,AGRN,981788,981988,899892,899992,101,200,81846
3,chr1,943468,ISG15_949466_949516_+,rs3121567,-5999,9.042620e-05,0.175897,8.195625e-02,7.821758e-02,+,ISG15,949466,949516,943418,943518,101,50,5998
4,chr1,945612,ISG15_949466_949516_+,rs3121565,-3855,3.104540e-05,0.193259,4.084279e-02,3.897963e-02,+,ISG15,949466,949516,945562,945662,101,50,3854
5,chr1,945861,ISG15_949466_949516_+,rs150242145,-3606,3.786060e-06,0.191278,9.499576e-03,9.066226e-03,+,ISG15,949466,949516,945811,945911,101,50,3605
6,chr1,946127,ISG15_949466_949516_+,rs34487852,-3340,3.924570e-05,0.186970,4.754982e-02,4.538070e-02,+,ISG15,949466,949516,946077,946177,101,50,3339
7,chr1,946135,ISG15_949466_949516_+,rs9723307,-3332,3.924570e-05,0.186970,4.754982e-02,4.538070e-02,+,ISG15,949466,949516,946085,946185,101,50,3331
8,chr1,947034,ISG15_949466_949516_+,rs2465126,-2433,2.757970e-06,0.195956,7.407468e-03,7.069555e-03,+,ISG15,949466,949516,946984,947084,101,50,2432
9,chr1,947538,ISG15_949466_949516_+,rs2465125,-1929,3.735910e-05,0.183229,4.595932e-02,4.386276e-02,+,ISG15,949466,949516,947488,947588,101,50,1928
