In [None]:
from matplotlib_venn import venn3, venn3_circles
import numpy as np
import pandas as pd
import bioframe
import glob
import os
from cooltools import snipping
import cooler
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 16})
binsDf=pd.read_csv('/path/to/mm10bin5kb.bed',sep='\t',header=None,names=['chrom','start','end']) #mm10bin5kb.bed file make using cooler makebins
chromsizes=bioframe.fetch_chromsizes('mm10')
chromosomes = list(chromsizes.index)
supports = [(chrom, 0, chromsizes[chrom]) for chrom in chromosomes]
saveDir='/path/to/outputs/'

In [None]:
def makeVenn3(setList, setLabels, colourList=None, alpha=0.5, saveName=None):
    v=venn3(setList,set_labels = setLabels)
    c=venn3_circles(setList)
    if colourList is not None:
        for patchInd in range(1,8):
            try:
                v.get_patch_by_id(np.binary_repr(patchInd, width=3)).set_color('white')
            except:
                pass
    for groupInd in range(3):
        if colourList is not None:
            c[groupInd].set_color(colourList[groupInd])
        c[groupInd].set_alpha(alpha)
    if saveName!=None:
        plt.savefig(saveName)
        
def getPeakInds(peakFile,binsize=5000,flank=300000,supports=supports,binsDf=binsDf):
    peakDf=pd.read_csv(peakFile,header=None,sep="\t",names=['chrom','start','end','name','score','strand','signalValue','pvalue','qvalue','peak'])
    windows = snipping.make_bin_aligned_windows(
        binsize,
        peakDf['chrom'],
        (peakDf['start'] + peakDf['end'])//2,
        flank_bp=flank)
    windows=windows.merge(pd.DataFrame(supports,columns=['chrom','zero','chromsize']),how='left',on='chrom')
    windows=windows[(windows.end<windows.chromsize) & (windows.start>0)].drop_duplicates()
    windows=windows.loc[(windows.chrom!='chrX')&(windows.chrom!='chrY')&(windows.chrom!='chrM'),:]
    startInds=binsDf.reset_index().merge(windows[['chrom','start']],how='inner',on=['chrom','start'])['index']
    endInds=binsDf.reset_index().merge(windows[['chrom','end']],how='inner',on=['chrom','end'])['index']
    indDf=pd.DataFrame({'startInd':startInds,'endInd':endInds})
    return ((indDf.startInd+indDf.endInd)//2).values

In [None]:
peakFiles=glob.glob('/path/to/peak/bedfiles/*')
peakIndsDict=dict(zip([os.path.basename(file).split('.')[0] for file in peakFiles],[getPeakInds(file) for file in peakFiles]))

In [None]:
cats=['Baker2015_prdm9_B6xCAST_peaks','grey2017orig_PRDM9_RJ2_class1_peaks','grey2017orig_PRDM9_B6_class1_peaks']
makeVenn3(setList=[set(peakIndsDict[cat]) for cat in cats], 
          setLabels=['PRDM9 sites\n(Baker 2015)','PRDM9-CAST\nClass1 sites\n(Grey 2017)','PRDM9-B6\nClass1 sites\n(Grey 2017)'], 
          alpha=0.5, saveName=f'{saveDir}/vennBakervsGreyC1.pdf')

In [None]:
cats=['Baker2015_prdm9_B6xCAST_peaks','grey2017orig_PRDM9_RJ2_class2_peaks','grey2017orig_PRDM9_B6_class2_peaks']
makeVenn3(setList=[set(peakIndsDict[cat]) for cat in cats], 
          setLabels=['PRDM9 sites\n(Baker 2015)','PRDM9-CAST\nClass2 sites\n(Grey 2017)','PRDM9-B6\nClass2 sites\n(Grey 2017)'], 
          alpha=0.5, saveName=f'{saveDir}/vennBakervsGreyC2.pdf')

In [None]:
cats=['Baker2015_prdm9_B6xCAST_peaks','smagulova2016_B6xCAST_DSB_fraglen1000_peaks','grey2017orig_PRDM9_RJ2_class2_peaks']
makeVenn3(setList=[set(peakIndsDict[cat]) for cat in cats], 
          setLabels=cats, 
          alpha=0.5, saveName=None)

In [None]:
cats=['vara2019_PDctcf_peaks','margolin2014RNAPII16dpp_peaks','vara2019_PDrad21l_peaks']
makeVenn3(setList=[set(peakIndsDict[cat]) for cat in cats], 
          setLabels=['Meiotic\nCTCF sites\n(Vara 2019)','Meiotic\nRNAPII sites\n(Margolin 2014)','Meiotic\nRAD21L sites\n(Vara 2019)'], 
          alpha=0.5, saveName=f'{saveDir}/vennMeioticCCR.pdf')

In [None]:
cats=['Nitzsche2011_ESC_RAD21_peaks','vara2019_PDrec8_peaks','vara2019_PDrad21l_peaks']
makeVenn3(setList=[set(peakIndsDict[cat]) for cat in cats], 
          setLabels=['ES RAD21 sites\n(Nitzsche 2011)','Meiotic\nREC8 sites\n(Vara 2019)','Meiotic\nRAD21L sites\n(Vara 2019)'], 
          alpha=0.5, saveName=f'{saveDir}/vennCohesins.pdf')

In [None]:
cats=['Nitzsche2011_ESC_RAD21_peaks','Nitzsche2011_ESC_CTCF_peaks','ES_RNAP2_peaks']
makeVenn3(setList=[set(peakIndsDict[cat]) for cat in cats], 
          setLabels=['ES RAD21 sites\n(Nitzsche 2011)','ES CTCF sites\n(Nitzsche 2011)','ES RNAPII sites\n(ENCODE)'], 
          alpha=0.5, saveName=f'{saveDir}/vennESCCR.pdf')