In [None]:
from cooltools import snipping
import cooler
import pandas as pd
from scipy.stats import ttest_ind
import numpy as np
import pybedtools
import bioframe
import seaborn as sns
import matplotlib.pyplot as plt
import glob
import os

In [None]:
plt.rcParams.update({'font.size': 24})
chromsizes=bioframe.fetch_chromsizes('mm10')
chromosomes = list(chromsizes.index)
supports = [(chrom, 0, chromsizes[chrom]) for chrom in chromosomes]

binsDf=pd.read_csv('/path/to/mm10bin5kb.bed',sep='\t',header=None,names=['chrom','start','end']) #bin file can be made with cooler makebins, binsize = 5000

signalFiles=glob.glob('/path/to/binned/bedgraphs/*')
signalDf=binsDf.copy()
for signalFile in signalFiles:
    signalDf=signalDf.merge(pd.read_csv(signalFile,sep="\t", header=None,names=['chrom','start','end',os.path.basename(signalFile).split('.')[0]]),how='left',on=['chrom','start','end'])
    signalDf.drop_duplicates(subset=['chrom','start','end'],inplace=True)
signalDf['zygo_interhomologRatio']=signalDf.zygonema_lucasHap_homo_cisCov/signalDf.zygo_cisCov
signalDf['pachy_interhomologRatio']=signalDf.pachynema_lucasHap_homo_cisCov/signalDf.pachy_cisCov



In [None]:
def splitQuantile(toSplit,splitOn,quantiles=4):
    quantileLims=np.linspace(0,len(toSplit),quantiles+1).astype(int)
    sortedArray=splitOn[toSplit].sort_values().index.to_numpy()
    quantiles=[sortedArray[quantileLims[ind]:quantileLims[ind+1]] for ind in range(quantiles)]
    return quantiles

def getPeakInds(peakFile,binsize=5000,flank=300000,supports=supports):
    peakDf=pd.read_csv(peakFile,header=None,sep="\t",names=['chrom','start','end','name','score','strand','signalValue','pvalue','qvalue','peak'])
    windows = snipping.make_bin_aligned_windows(
        binsize,
        peakDf['chrom'],
        (peakDf['start'] + peakDf['end'])//2,
        flank_bp=flank)
    windows=windows.merge(pd.DataFrame(supports,columns=['chrom','zero','chromsize']),how='left',on='chrom')
    windows=windows[(windows.end<windows.chromsize) & (windows.start>0)].drop_duplicates()
    windows=windows.loc[(windows.chrom!='chrX')&(windows.chrom!='chrY')&(windows.chrom!='chrM'),:]
    startInds=binsDf.reset_index().merge(windows[['chrom','start']],how='inner',on=['chrom','start'])['index']
    endInds=binsDf.reset_index().merge(windows[['chrom','end']],how='inner',on=['chrom','end'])['index']
    indDf=pd.DataFrame({'startInd':startInds,'endInd':endInds})
    return ((indDf.startInd+indDf.endInd)//2).values

def makeTracePlots(posListDict, signalLabelsDf, rowOrder=None,colOrder=None,palette=None, numBins=121, saveName=None, df=signalDf, avgType='median', aspectRatio=1.2, shareColAxes='stretch'):
    return None
def makeHeatmap(posListDict, varDict, showSignificance=True, saveName=None, df=signalDf):
    numVars=len(varDict.keys())
    heatmapArray=np.zeros((len(posListDict), numVars))
    pValArray=np.zeros(numVars)
    for varInd,var in enumerate(varDict):
        scores=[]
        for posListInd,posListKey in enumerate(posListDict):
            posList=posListDict[posListKey]
            heatmapArray[posListInd,varInd]=np.log(df[var][posList].mean()/df[var].mean())
            scores.append(df[var][posList].values)
        pValArray[varInd]=ttest_ind(scores[2],scores[1])[1]*numVars
    starList=['*' if (pVal<0.01) else 'n.s.' for pVal in pValArray]
    heatMapDf=pd.DataFrame(heatmapArray,columns=[varDict[varKey] for varKey in varDict], index=posListDict.keys())
    plt.figure(figsize=(1.5*numVars,1.5*len(posListDict)))
    absVmax=1.1*np.max(np.abs(heatMapDf.values))
    ax=sns.heatmap(heatMapDf, cmap='seismic', center=0, vmin=-absVmax, vmax = absVmax, square=True, annot=True, cbar_kws={'fraction':0.04, 'label':'Log-enrichment'})
    ax.hlines([0,1], color='k', *ax.get_xlim())
    ax.vlines([1,2,3], color='k', *ax.get_ylim())
    if showSignificance==True:
        newTickLabels=[tickLabel.get_text()+' ('+starList[ind]+')' for ind,tickLabel in enumerate(ax.get_xticklabels())]
        ax.set_xticklabels(newTickLabels)
    plt.yticks(rotation=0)
    plt.xticks(rotation=90)
    if saveName!=None:
        plt.savefig(saveName)


In [None]:
peakFiles=glob.glob('/path/to/peak/bed/files/*')
peakFile=peakFiles[0]
peakIndsDict=dict(zip([os.path.basename(file).split('.')[0] for file in peakFiles],[getPeakInds(file) for file in peakFiles]))
saveDir='/path/to/save/outputs/'

In [None]:
chromhmmVarDict={'chromHMMstate_1_binned':'H3K4me3', 
       'chromHMMstate_2_binned':'H3K4me1/3',
       'chromHMMstate_3_binned':'H3K4me1',
       'chromHMMstate_4_binned':'H3K4me1+\nH3K36me3',
       'chromHMMstate_5_binned':'H3K36me3',
       'chromHMMstate_6_binned':'Unmarked',
       'chromHMMstate_7_binned':'H3K27me3'}
recombVarDict={'Baker2015_prdm9_B6xCAST':'PRDM9 ChIP-seq', 
       'smagulova2016_B6xCAST_DSB_fraglen1000':'DMC1-SSDS\nChIP-seq (DSB)',
       'yin2019_crossoversCast1C_binned_weightedScore_medianNormalized':'Crossover score'}

In [None]:
greyPrdm9castSplitonDMC=splitQuantile(peakIndsDict['grey2017orig_PRDM9_RJ2_class1_peaks'],signalDf.smagulova2016_B6xCAST_DSB_fraglen1000)
greyPrdm9castPeakListDict={'Grey PRDM9CAST sites (all)':peakIndsDict['grey2017orig_PRDM9_RJ2_class1_peaks'],
                   'Grey PRDM9CAST sites (top-DSB)':greyPrdm9castSplitonDMC[-1],
                   'Grey PRDM9CAST sites (bottom-DSB)':greyPrdm9castSplitonDMC[0]}

prdm9SplitonDMC=splitQuantile(peakIndsDict['Baker2015_prdm9_B6xCAST_peaks'],signalDf.smagulova2016_B6xCAST_DSB_fraglen1000)
prdm9PeakListDict={'PRDM9 sites\n(all)':peakIndsDict['Baker2015_prdm9_B6xCAST_peaks'],
                   'PRDM9 sites\n(top-DSB)':prdm9SplitonDMC[-1],
                   'PRDM9 sites\n(bottom-DSB)':prdm9SplitonDMC[0]}

DMCSplitonCO=splitQuantile(peakIndsDict['smagulova2016_B6xCAST_DSB_fraglen1000_peaks'],signalDf.yin2019_crossoversCast1C_binned_weightedScore_medianNormalized)
DSBPeakListDict={'DSB sites\n(all)':peakIndsDict['smagulova2016_B6xCAST_DSB_fraglen1000_peaks'],
                   'DSB sites\n(top-CO)':DMCSplitonCO[-1],
                   'DSB sites\n(bottom-CO)':DMCSplitonCO[0]}

prdm9SplitonCO=splitQuantile(peakIndsDict['Baker2015_prdm9_B6xCAST_peaks'],signalDf.yin2019_crossoversCast1C_binned_weightedScore_medianNormalized)
prdm9SplitCOPeakListDict={'PRDM9 sites\n(all)':peakIndsDict['Baker2015_prdm9_B6xCAST_peaks'],
                   'PRDM9 sites\n(top-CO)':prdm9SplitonCO[-1],
                   'PRDM9 sites\n(bottom-CO)':prdm9SplitonCO[0]}

In [None]:
makeHeatmap(posListDict=DSBPeakListDict, varDict=chromhmmVarDict, saveName=saveDir+'/dsb_chromhmm_heatmap.pdf')
makeHeatmap(posListDict=DSBPeakListDict, varDict=recombVarDict, saveName=saveDir+'/dsb_recomb_heatmap.pdf')
makeHeatmap(posListDict=prdm9PeakListDict, varDict=chromhmmVarDict, saveName=saveDir+'/prdm9_chromhmm_heatmap.pdf')
makeHeatmap(posListDict=prdm9PeakListDict, varDict=recombVarDict, saveName=saveDir+'/prdm9_recomb_heatmap.pdf')
makeHeatmap(posListDict=prdm9SplitCOPeakListDict, varDict=chromhmmVarDict, saveName=saveDir+'/prdm9splitCO_chromhmm_heatmap.pdf')
makeHeatmap(posListDict=prdm9SplitCOPeakListDict, varDict=recombVarDict, saveName=saveDir+'/prdm9splitCO_recomb_heatmap.pdf')

In [None]:
greyPrdm9castSplitonDMC=splitQuantile(peakIndsDict['grey2017orig_PRDM9_RJ2_class1_peaks'],signalDf.smagulova2016_B6xCAST_DSB_fraglen1000)
greyPrdm9castPeakListDict={'Grey PRDM9-CAST sites\n(Class 1 - all)':peakIndsDict['grey2017orig_PRDM9_RJ2_class1_peaks'],
                   'Grey PRDM9-CAST sites\n(top-DSB)':greyPrdm9castSplitonDMC[-1],
                   'Grey PRDM9-CAST sites\n(bottom-DSB)':greyPrdm9castSplitonDMC[0]}

greyPrdm9b6SplitonDMC=splitQuantile(peakIndsDict['grey2017orig_PRDM9_B6_class1_peaks'],signalDf.smagulova2016_B6xCAST_DSB_fraglen1000)
greyPrdm9b6PeakListDict={'Grey PRDM9-B6 sites\n(Class 1 - all)':peakIndsDict['grey2017orig_PRDM9_B6_class1_peaks'],
                   'Grey PRDM9-B6 sites\n(top-DSB)':greyPrdm9b6SplitonDMC[-1],
                   'Grey PRDM9-B6 sites\n(bottom-DSB)':greyPrdm9b6SplitonDMC[0]}

greyPrdm9allPeakListDict={'Grey PRDM9-B6 sites\n(Class 1)':peakIndsDict['grey2017orig_PRDM9_B6_class1_peaks'],
                          'Grey PRDM9-B6 sites\n(Class 2)':peakIndsDict['grey2017orig_PRDM9_B6_class2_peaks'],
                          'Grey PRDM9-CAST sites\n(Class 1)':peakIndsDict['grey2017orig_PRDM9_RJ2_class1_peaks'],
                          'Grey PRDM9-CAST sites\n(Class 2)':peakIndsDict['grey2017orig_PRDM9_RJ2_class2_peaks']}

cohesinPeakListDict={'Meiotic RAD21L sites':peakIndsDict['vara2019_RSrad21l_peaks'],
                          'Meiotic REC8 sites':peakIndsDict['vara2019_RSrec8_peaks'],
                          'ES RAD21 sites':peakIndsDict['Nitzsche2011_ESC_RAD21_peaks']}

makeHeatmap(posListDict=greyPrdm9castPeakListDict, varDict=chromhmmVarDict, saveName=saveDir+'/greyprdm9cast_chromhmm_heatmap.pdf')
makeHeatmap(posListDict=greyPrdm9castPeakListDict, varDict=recombVarDict, saveName=saveDir+'/greyprdm9cast_recomb_heatmap.pdf')
makeHeatmap(posListDict=greyPrdm9b6PeakListDict, varDict=chromhmmVarDict, saveName=saveDir+'/greyprdm9b6_chromhmm_heatmap.pdf')
makeHeatmap(posListDict=greyPrdm9b6PeakListDict, varDict=recombVarDict, saveName=saveDir+'/greyprdm9b6_recomb_heatmap.pdf')
makeHeatmap(posListDict=greyPrdm9allPeakListDict, varDict=chromhmmVarDict, showSignificance=False,saveName=saveDir+'/greyprdm9all_chromhmm_heatmap.pdf')
makeHeatmap(posListDict=greyPrdm9allPeakListDict, varDict=recombVarDict, showSignificance=False,saveName=saveDir+'/greyprdm9all_recomb_heatmap.pdf')
makeHeatmap(posListDict=cohesinPeakListDict, varDict=chromhmmVarDict, showSignificance=False,saveName=saveDir+'/cohesins_chromhmm_heatmap.pdf')
