In [2]:
import pandas as pd
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pyranges as pr
import glob as glob
import os
from importlib import reload
%matplotlib inline
import sys
sys.path.append("modules")
import bedgraph_tools as bgt
import peak_tools as pkt
from collections import defaultdict
import pyBigWig as pw
from matplotlib_venn import venn2
import pysam
from itertools import groupby
from Bio import motifs
import logomaker
from Bio import Align
from Bio.Seq import Seq

In [4]:
metadat_nmumg = pd.read_parquet('achi_nmumg_metadat_20252601.parquet')

bigwigs = glob.glob('/home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/NMuMG_all/analysis/bigwigs/*.bw')
bw_df = pd.DataFrame(dict(zip([x.split('/')[-1].split('_a')[0] for x in bigwigs], bigwigs)), index=['BW']).T
bw_df.loc['hGR_WT', 'BW'] = '/home/labs/barkailab/vovam/Mammalian/Mammal_AchInbalOvaJd/data/wt_no_lig_bws/hGR_WT_averaged.bw'
bw_df.loc['hPR_WT', 'BW'] = '/home/labs/barkailab/vovam/Mammalian/Mammal_AchInbalOvaJd/data/wt_no_lig_bws/hPR_WT_averaged.bw'
bw_df.loc['mAR_WT', 'BW'] = '/home/labs/barkailab/vovam/Mammalian/Mammal_AchInbalOvaJd/data/wt_no_lig_bws/mAR_WT_averaged.bw'
bw_df = pd.concat([metadat_nmumg.groupby('Sample').count(), bw_df], axis=1).drop('hGR_dLBD_dN350').loc[:, 'BW']


metadat_nmumg.loc[:, 'AverageBw'] = bw_df.loc[metadat_nmumg.reset_index().set_index('Sample').index].values.T

comb_peaks = glob.glob('/home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/NMuMG_all/analysis/peaks/*_optimal.bed')

peaks_df = pd.DataFrame(dict(zip([x.split('/')[-1].split('_opt')[0] for x in comb_peaks], comb_peaks)), index=['Peak']).T
peaks_df.loc['hGR_WT', 'Peak'] = '/home/labs/barkailab/vovam/Mammalian/Mammal_AchInbalOvaJd/data/wt_no_lig_peaks/hGR_WT_optimal.bed'
peaks_df.loc['hPR_WT', 'Peak'] = '/home/labs/barkailab/vovam/Mammalian/Mammal_AchInbalOvaJd/data/wt_no_lig_peaks/hPR_WT_optimal.bed'
peaks_df.loc['mAR_WT', 'Peak'] = '/home/labs/barkailab/vovam/Mammalian/Mammal_AchInbalOvaJd/data/wt_no_lig_peaks/mAR_WT_optimal.bed'

peaks_df = pd.concat([metadat_nmumg.groupby('Sample').count(), peaks_df], axis=1).loc[:, 'Peak']


metadat_nmumg.loc[:, 'AveragePeaks'] = peaks_df.loc[metadat_nmumg.reset_index().set_index('Sample').index].values.T
drops = ['NMuMG_mAR_dLBD_1_S25', 'NMuMG_mAR_dLBD_2_S26', 'NMuMG_mAR_dLBD_3_S27',
       'NMuMG_mAR_dLBD_4_S28', 'NmuMG_mAR_dLBD_1_S4', 'NmuMG_mAR_dLBD_2_S5',
       'NmuMG_mAR_dLBD_3_S6']

metadat_nmumg = metadat_nmumg.drop(drops)
smads = pd.read_parquet('data/fig1_qc_metadata.parquet')
metadat_nmumg = pd.concat([metadat_nmumg, smads.filter(regex='SMAD1', axis=0)], axis=0).drop('Exp', axis=1)

In [5]:
bams1 = sorted(glob.glob('/home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/mapped/*.bam'))
bams1 = [x for x in bams1 if 'tmp' not in x]
peaks1 = sorted(glob.glob('/home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/cleaned_peaks/*.clean.narrowPeak'))
raw1 = sorted(glob.glob('/home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/outfiles/*.bedgraph'))
norm1 = sorted(glob.glob('/home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/results/bedgraphs/*.bedgraph.gz'))
fstack1 = sorted(glob.glob('/home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/results/sumproms/*.sumprom.gz'))
bw1 = sorted(glob.glob('/home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/bigwigs/*.bam.bw'))

def rename(l):
    return [i.split('/')[-1].split('.')[0] for i in l]

df1 = pd.DataFrame([bams1, peaks1, raw1, norm1, fstack1, bw1], columns=rename(peaks1), index=['bamReads', 'Peaks', 'RawBG', 'NormBG', 'Annots', 'BigWig']).T
df1.loc[:, 'Sample'] = ['_'.join(x[:-1])[:-2] for x in df1.index.str.split('_')]
df1 = df1.iloc[:-1,:]

In [6]:
average_peaks_new = glob.glob('/home/labs/barkailab/vovam/Mammalian/Mammal_AchInbalOvaJd/averaging_missing_peaks/*optimal.bed')
av_dfnew = pd.DataFrame(average_peaks_new, index=rename(average_peaks_new))
av_dfnew.index = av_dfnew.index.str.replace('_optimal', '')
av_dfnew = av_dfnew.loc[df1.reset_index().set_index('Sample').index]

av_dfnew.columns = ['AveragePeaks']
df1.loc[:, 'AveragePeaks'] = av_dfnew.values.T[0]



average_bws_new = glob.glob('/home/labs/barkailab/vovam/Mammalian/Mammal_AchInbalOvaJd/averaging_missing_bws/*_averaged.bw')
av_dfnew = pd.DataFrame(average_bws_new, index=rename(average_bws_new))
av_dfnew.index = av_dfnew.index.str.replace('_averaged', '')
av_dfnew = av_dfnew.loc[df1.reset_index().set_index('Sample').index]

av_dfnew.columns = ['AverageBw']
df1.loc[:, 'AverageBw'] = av_dfnew.values.T[0]

In [7]:
r1_1 = sorted(glob.glob('/home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/outfiles_read1/*.bedgraph'))
r1_2 = sorted(glob.glob('/home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/NMuMG_all/outfiles_read1/*.bedgraph'))
r1_3 = sorted(glob.glob('/home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/SMAD1_files/outfiles_read1/*.bedgraph'))
r1_df = pd.DataFrame(r1_1+ r1_2+r1_3, index=rename(r1_1+ r1_2+r1_3))
r1_df.columns = ['Read1BG']

In [8]:
metadat_nmumg = pd.concat([metadat_nmumg, df1.drop('NMuMG_hGR_dLBD_dN400_3_S109')], axis=0)
metadat_nmumg = pd.concat([metadat_nmumg, r1_df.loc[metadat_nmumg.index]], axis=1)

In [9]:
nind = []
for i in metadat_nmumg.index:
    ni = i.replace('NMuMG_', '').replace('NmuMG_', '')
    ni = ni.split('_')
    
    if 'AR' in ni[0]:
        ni[0]= 'mAR'
    elif 'GR' in ni[0]:
        ni[0] = 'hGR'
    elif 'PR' in ni[0]:
        ni[0] = 'hPR'
    nni = '_'.join(ni)
    nind.append(nni)

In [10]:
metadat_nmumg.index = nind

In [13]:
metadat_nmumg.filter(regex='hPR', axis=0)

Unnamed: 0,bamReads,Peaks,RawBG,NormBG,Annots,BigWig,Sample,AverageBw,AveragePeaks,Read1BG
hPR_DBD_1_S52,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_PR_DBD,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
hPR_DBD_2_S53,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_PR_DBD,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
hPR_DBD_3_S54,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_PR_DBD,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
hPR_dLBD_dN100_4_S4,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_hPR_dLBD_dN100,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
hPR_dLBD_dN300_1_S5,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_hPR_dLBD_dN300,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
hPR_dLBD_dN300_2_S6,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_hPR_dLBD_dN300,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
hPR_dLBD_dN300_3_S7,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_hPR_dLBD_dN300,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
hPR_dLBD_dN300_4_S8,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_hPR_dLBD_dN300,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
hPR_dLBD_dN450_1_S9,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_hPR_dLBD_dN450,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
hPR_dLBD_dN450_2_S10,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_hPR_dLBD_dN450,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...


In [59]:
new_data1 = glob.glob('individual_data/bad_regs/*.csv')
new_data2 = glob.glob('individual_data/bins_dat/*_bins.csv')
new_data3 = glob.glob('individual_data/clean_bg/*clean.parquet')
new_data4 = glob.glob('individual_data/clean_bg_read1/*read1_clean.parquet')
new_data5 = glob.glob('individual_data/distances/*dist.parquet')

In [75]:
ndf = pd.DataFrame([new_data1,new_data2,new_data3,new_data4, new_data5], index=['Bad_Regs', 'BinsCalc', 'CleanNonNormBG', 'CleanNonNormR1BG', 'BRDistances'], columns=rename(new_data1)).T

In [77]:
pd.concat([metadat_nmumg, ndf], axis=1).to_parquet('new_metadata.parquet')

In [78]:
metadat_nmumg

Unnamed: 0,bamReads,Peaks,RawBG,NormBG,Annots,BigWig,Sample,AverageBw,AveragePeaks,Read1BG
mAR_dLBD_dN100_1_S46,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_AR_dLBD_dN100,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
mAR_dLBD_dN100_2_S47,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_AR_dLBD_dN100,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
mAR_dLBD_dN100_3_S48,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_AR_dLBD_dN100,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
mAR_dLBD_dN150_1_S82,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_AR_dLBD_dN150,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
mAR_dLBD_dN150_2_S83,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_AR_dLBD_dN150,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
...,...,...,...,...,...,...,...,...,...,...
mAR_dLBD_dN400_3_S90,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_mAR_dLBD_dN400,/home/labs/barkailab/vovam/Mammalian/Mammal_Ac...,/home/labs/barkailab/vovam/Mammalian/Mammal_Ac...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
mAR_dLBD_dN450_1_S91,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_mAR_dLBD_dN450,/home/labs/barkailab/vovam/Mammalian/Mammal_Ac...,/home/labs/barkailab/vovam/Mammalian/Mammal_Ac...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
mAR_dLBD_dN450_2_S92,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_mAR_dLBD_dN450,/home/labs/barkailab/vovam/Mammalian/Mammal_Ac...,/home/labs/barkailab/vovam/Mammalian/Mammal_Ac...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...
mAR_dLBD_dN450_3_S93,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...,NMuMG_mAR_dLBD_dN450,/home/labs/barkailab/vovam/Mammalian/Mammal_Ac...,/home/labs/barkailab/vovam/Mammalian/Mammal_Ac...,/home/labs/barkailab/vovam/../LAB/data/SEQ/Ach...


In [17]:
masked_region = '/home/labs/barkailab/vovam/Mammalian/mm10_annot/masked.bed'
mask = pr.read_bed(masked_region)

  return {k: v for k, v in df.groupby(grpby_key)}


In [18]:
def load_bedgraph(path):
     return(pl.read_csv(
                path,
                separator="\t",
                has_header=False,
                new_columns=["Chromosome", "Start", "End", "Value"],
            ))

In [37]:
procf_barebones = 'bsub -n 8 -q short -R \"span[hosts=1]\" -R \"rusage[mem=4000]\" \"python proc_files.py {sample} {bg} {bgr1} {mask}\"'


for sample, bg, bgr1 in metadat_nmumg.loc[:, ['RawBG', 'Read1BG']].reset_index().values:
    print(procf_barebones.format(sample=sample, bg=bg, bgr1=bgr1, mask=masked_region))

bsub -n 8 -q short -R "span[hosts=1]" -R "rusage[mem=4000]" "python proc_files.py mAR_dLBD_dN100_1_S46 /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/NMuMG_all/outfiles/NMuMG_AR_dLBD_dN100_1_S46.bedgraph /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/NMuMG_all/outfiles_read1/NMuMG_AR_dLBD_dN100_1_S46.bedgraph /home/labs/barkailab/vovam/Mammalian/mm10_annot/masked.bed"
bsub -n 8 -q short -R "span[hosts=1]" -R "rusage[mem=4000]" "python proc_files.py mAR_dLBD_dN100_2_S47 /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/NMuMG_all/outfiles/NMuMG_AR_dLBD_dN100_2_S47.bedgraph /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/NMuMG_all/outfiles_read1/NMuMG_AR_dLBD_dN100_2_S47.bedgraph /home/labs/barkailab/vovam/Mammalian/mm10_annot/masked.bed"
bsub -n 8 -q short -R "span[hosts=1]" -R "rusage[mem=4000]" "python proc_files.py mAR_dLBD_dN100_3_S48 /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/NMuMG_all/outfiles/NMuMG_AR_dLBD_dN100_3_S48.bedgraph /hom

# Did all of this to produce the missing averaged peaks and bigiigs for good samples

In [211]:
# bgrouped = df1.groupby("Sample").agg(lambda x: list(x)).sort_index()
# mask = '/home/labs/barkailab/vovam/Mammalian/mm10_annot/masked.bed'

# bigwig_barebones = 'bsub -n 8 -q short -R \"span[hosts=1]\" -R \"rusage[mem=1000]\" \"bigwigAverage -b {bws} -bl {mask} -p 4 -bs 1 -o {out}\"'
# # print(bigwig_barebones)
# for name, i in zip(bgrouped.index, bgrouped.loc[:, 'BigWig'].values):
#     bws = ' '.join(i)
#     out = 'averaging_missing_bws/{}_averaged.bw'.format(name)
#     print(bigwig_barebones.format(bws=bws, mask=mask, out=out))


bsub -n 8 -q short -R "span[hosts=1]" -R "rusage[mem=1000]" "bigwigAverage -b /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/bigwigs/NMuMG_hGR_dLBD_dN300_1_S103.bam.bw /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/bigwigs/NMuMG_hGR_dLBD_dN300_2_S104.bam.bw /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/bigwigs/NMuMG_hGR_dLBD_dN300_3_S105.bam.bw /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/bigwigs/NMuMG_hGR_dLBD_dN300_4_S106.bam.bw -bl /home/labs/barkailab/vovam/Mammalian/mm10_annot/masked.bed -p 4 -bs 1 -o averaging_missing_bws/NMuMG_hGR_dLBD_dN300_averaged.bw"
bsub -n 8 -q short -R "span[hosts=1]" -R "rusage[mem=1000]" "bigwigAverage -b /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/bigwigs/NMuMG_hGR_dLBD_dN400_1_S107.bam.bw /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/bigwigs/NMuMG_hGR_dLBD

In [213]:
# chipr_barebones = 'bsub -n 2 -q short -R \"span[hosts=1]\" -R \"rusage[mem=8000]\" \"chipr -i {peaks} -o {output} -a 0.01\"'
# for name, i in zip(bgrouped.index, bgrouped.loc[:, 'Peaks'].values):
#     bws = ' '.join(i)
#     out = 'averaging_missing_peaks/{}'.format(name)
#     print(chipr_barebones.format(peaks=bws, output=out))

bsub -n 2 -q short -R "span[hosts=1]" -R "rusage[mem=8000]" "chipr -i /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/cleaned_peaks/NMuMG_hGR_dLBD_dN300_1_S103.clean.narrowPeak /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/cleaned_peaks/NMuMG_hGR_dLBD_dN300_2_S104.clean.narrowPeak /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/cleaned_peaks/NMuMG_hGR_dLBD_dN300_3_S105.clean.narrowPeak /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/cleaned_peaks/NMuMG_hGR_dLBD_dN300_4_S106.clean.narrowPeak -o averaging_missing_peaks/NMuMG_hGR_dLBD_dN300 -a 0.01"
bsub -n 2 -q short -R "span[hosts=1]" -R "rusage[mem=8000]" "chipr -i /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/cleaned_peaks/NMuMG_hGR_dLBD_dN400_1_S107.clean.narrowPeak /home/labs/barkailab/vovam/../LAB/data/SEQ/AchInbal_Data/20250116_NMuMG_trcs_3/cleaned_peaks/NMuMG_hGR_dLBD_dN400