## Get started
### Imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import LogNorm
from os.path import exists, dirname
import os, sys
import numpy as np
import seaborn as sns
import json
path = os.path.dirname((os.path.abspath('')))
print(path)
sys.path.append(path)
from dreem_nap.study import Study
import yaml

### Configuration
- Load config file and check what's in it
- Configurate the Notebook

In [None]:
with open('config.yml', 'r') as ymlfile:
    cfg = yaml.safe_load(ymlfile)
for k,v in cfg.items():
    print(k,(30-len(k))*'_',v)

mpl.rcParams['figure.dpi'] = cfg['mpl_rcParams_figure_dpi'] # the highest the resolution, the slowest the plotting
mpl.rcParams["figure.figsize"] = cfg['mpl_rcParams_figure_figsize'] # (width, height) in inches

### Load data
Create and load dataframe of a single study as a demo

In [None]:
from dreem_nap.study import Study

# Create a study
salt = Study().from_dict({'name': 'salt',
                         'description': 'Change the Na concentration', 
                         'samples': ['A6', 'B6', 'C6', 'D6', 'E6'], 
                         'title': 'Na quantity [M]', 
                         'conditions': [0.15, 0.3, 0.6, 1.0, 1.2]})

# Load data
salt.load_df_from_local_files(path_to_data= cfg['path_to_data'], 
                              min_cov_bases= cfg['min_cov_bases'])

# Show the dataframe
salt.df.head()

In [1]:
#! usr/bin/env python3

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import LogNorm
from os.path import exists, dirname
import os, sys
import numpy as np
import seaborn as sns
import json

path = '/Users/ymdt/src/dreem_nap/'

sys.path.append(path)
from dreem_nap.manipulator import Manipulator

from dreem_nap.study import Study, util
import yaml
import pickle

#mpl.use('agg')


with open(path+'config.yml', 'r') as ymlfile:
    cfg = yaml.safe_load(ymlfile)

mpl.rcParams['figure.dpi'] = cfg['mpl_rcParams_figure_dpi'] # the highest the resolution, the slowest the plotting

####
# SET HYPER PARAMETERS HERE
####

studies = Study.load_studies(cfg['path_to_studies'])
study = Study().from_dict(studies['3UTR +/- DMS'].__dict__)
study.load_df_from_local_files(path_to_data= cfg['path_to_data'], min_cov_bases = cfg['min_cov_bases'], filter_by='sample')

#with open(path+'data/temperature_df.p','rb') as f:
##    study._df = pickle.load(f)
 #   f.close()

study.constructs = study._df['construct'].unique()

samp, construct = 'D7','1988'

study._df['cluster'] = 0

# base_type = ['A','C','G','T']

# base_index = 'roi', 'all', [93,95,96]
# base_paired = True, False or None (=both) # default is None 
# figsize = (25, 7) # custom by plot type

# structure = "structure_ROI"

# deltaG = "deltaG_ens_DMS"
# cluster = 0, 1, 2

print(studies)


{'3UTR +/- DMS': <dreem_nap.study.Study object at 0x29a530370>, '3UTR_v_5UTR': <dreem_nap.study.Study object at 0x29ba523e0>, '5UTR +/- DMS': <dreem_nap.study.Study object at 0x29ba51ea0>, 'all': <dreem_nap.study.Study object at 0x29ba52680>}


In [None]:
get_col_across_constructs

In [None]:
df = study.get_df()
for s in df.samp.unique():
    for c in df.construct.unique():
        study.plot.mut_histogram(samp=s, construct=c, plot_type='index', index = list(range(19,42)), figsize=(25, 7), grid=True)
        util.save_fig(f"/Users/ymdt/src/data/figs/Lauren/date/mutation histograms/{s}_{c}_index.png")

In [None]:
study.mani.get_SCC(samp=samp, construct=construct, cols=['mut_rates'], base_type=base_type, index=index, sub_lib=['PP7 canonical bp variants','G quadraplex variants'])

In [None]:
out = study.plot.mut_histogram(samp=470, construct='3114-O-flank_1=hp7-DB', plot_type='index', index = list(range(19,42)), figsize=(25, 7), grid=True)
print(out.data.to_csv('hi_lauren.csv'))

In [21]:
stack = pd.DataFrame()
for c in study.constructs:
    stack = pd.concat((stack, pd.DataFrame(study.mani.get_SCC(samp=470, construct=c, cols=['mut_rates'], index=list(range(19,42))).T)))#, index=[c])))
stack.index = study.constructs
stack

Unnamed: 0,19,20,21,22,23,24,25,26,27,28,...,32,33,34,35,36,37,38,39,40,41
3114-O-flank_1=hp7-DB,0.003974,0.005961,0.011426,0.001490,0.008942,0.000497,0.000497,0.050174,0.006955,0.002484,...,0.070045,0.018381,0.011426,0.006458,0.008445,0.000497,0.008445,0.000000,0.001987,0.008445
3482-O-flank_1=lp11-DB,0.003414,0.007853,0.017071,0.001024,0.027313,0.001707,0.003756,0.050529,0.003073,0.001366,...,0.063844,0.032093,0.014681,0.017412,0.020143,0.000683,0.016046,0.001024,0.009218,0.016729
3091-CC-flank_1=hp5-DB,0.002505,0.005369,0.013958,0.000716,0.018611,0.001790,0.002505,0.037938,0.007874,0.005369,...,0.065116,0.033274,0.021467,0.013953,0.011449,0.000000,0.011807,0.000358,0.002862,0.008587
3124-O-flank_1=hp7-DB,0.005364,0.011494,0.013022,0.000766,0.016469,0.000766,0.005360,0.043645,0.004211,0.006508,...,0.065467,0.027565,0.005360,0.008037,0.015691,0.002679,0.018752,0.000000,0.000765,0.008416
3546-O-flank_1=bi4-rre-DB,0.002732,0.008197,0.018443,0.000683,0.011612,0.000000,0.002732,0.061433,0.002730,0.003413,...,0.064846,0.023891,0.011604,0.010922,0.018430,0.000682,0.014325,0.000000,0.000000,0.007503
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3318-CC-flank_1=cb10-DB,0.001178,0.010264,0.022211,0.000505,0.022544,0.001346,0.002692,0.056023,0.003365,0.002692,...,0.057023,0.019849,0.009588,0.012111,0.018839,0.000336,0.012447,0.003701,0.013457,0.021355
3417-O-flank_1=lp6-DB,0.003600,0.005760,0.026638,0.000000,0.014399,0.000720,0.001440,0.059755,0.041037,0.002880,...,0.061915,0.030238,0.017279,0.011519,0.007919,0.001440,0.016559,0.098632,0.000720,0.011519
3376-O-flank_1=lp2-DB,0.003297,0.008350,0.018457,0.000000,0.015601,0.001318,0.002637,0.041749,0.003735,0.000879,...,0.061731,0.019552,0.010105,0.013837,0.009225,0.000220,0.019767,0.000659,0.004393,0.010982
3352-O-flank_1=cb12-DB,0.002966,0.004745,0.007711,0.001186,0.010083,0.000000,0.001779,0.055160,0.004152,0.004745,...,0.068802,0.020154,0.008299,0.007113,0.008299,0.000000,0.009484,0.000000,0.000000,0.011263


In [19]:
mpl.use('agg')
from scipy import stats
studies = Study.load_studies(cfg['path_to_studies'])

for min_base_cov in [500, 1000, 2000, 3000, 4000, 5000]:
    for s in studies.values():
        s.load_df_from_local_files(path_to_data= cfg['path_to_data'], min_cov_bases = min_base_cov)
        df = s.get_df()
        df['cluster'] = 0
        for c in s.constructs: 
            x = np.array(s.mani.get_SCC(samp=s.samples[0], construct=c, cols=['mut_rates'], can_be_empty=True, index=list(range(19,42)))).reshape(-1)
            y = np.array(s.mani.get_SCC(samp=s.samples[1], construct=c, cols=['mut_rates'], can_be_empty=True, index=list(range(19,42)))).reshape(-1)
            plt.plot(x,y, '.')
            res = stats.linregress(x, y)
            plt.plot(x, res.slope*x + res.intercept, '-')
            plt.title(f"{c}       R^2 = {res.rvalue**2:.5f}")
            plt.xlabel(s.samples[0])
            plt.ylabel(s.samples[1])
            plt.legend(['data',f"fit: y={res.slope:.5f}x+{res.intercept:.5f}"])
            plt.tight_layout()
            util.save_fig(f"/Users/ymdt/src/data/figs/Lauren/date/correlation/{min_base_cov}/{s.samples[0]}_{s.samples[1]}/{c}.png")

532 constructs found across all samples for study 3UTR +/- DMS.
528 constructs found across all samples for study 3UTR_v_5UTR.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


420 constructs found across all samples for study 5UTR +/- DMS.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)
  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


393 constructs found across all samples for study all.
374 constructs found across all samples for study 3UTR +/- DMS.
393 constructs found across all samples for study 3UTR_v_5UTR.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


221 constructs found across all samples for study 5UTR +/- DMS.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)
  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


179 constructs found across all samples for study all.
109 constructs found across all samples for study 3UTR +/- DMS.
188 constructs found across all samples for study 3UTR_v_5UTR.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


40 constructs found across all samples for study 5UTR +/- DMS.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)
  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


18 constructs found across all samples for study all.
25 constructs found across all samples for study 3UTR +/- DMS.
80 constructs found across all samples for study 3UTR_v_5UTR.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


7 constructs found across all samples for study 5UTR +/- DMS.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)
  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


No construct found across all samples for study all.
2 constructs found across all samples for study 3UTR +/- DMS.
30 constructs found across all samples for study 3UTR_v_5UTR.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


2 constructs found across all samples for study 5UTR +/- DMS.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)
  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


No construct found across all samples for study all.
No construct found across all samples for study 3UTR +/- DMS.
15 constructs found across all samples for study 3UTR_v_5UTR.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


1 constructs found across all samples for study 5UTR +/- DMS.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


No construct found across all samples for study all.


  df['mut_rates'] = df.apply(lambda x: np.divide(x['mut_bases'],x['info_bases']), axis=1)


In [7]:
x,y

(array([[0.00397417],
        [0.00596125],
        [0.01142573],
        [0.00149031],
        [0.00894188],
        [0.00049677],
        [0.00049677],
        [0.05017387],
        [0.00695479],
        [0.00248385],
        [0.10183805],
        [0.00298063],
        [0.09538003],
        [0.07004471],
        [0.01838053],
        [0.01142573],
        [0.00645802],
        [0.00844511],
        [0.00049677],
        [0.00844511],
        [0.        ],
        [0.00198708],
        [0.00844511]]),
 array([[0.00397417],
        [0.00596125],
        [0.01142573],
        [0.00149031],
        [0.00894188],
        [0.00049677],
        [0.00049677],
        [0.05017387],
        [0.00695479],
        [0.00248385],
        [0.10183805],
        [0.00298063],
        [0.09538003],
        [0.07004471],
        [0.01838053],
        [0.01142573],
        [0.00645802],
        [0.00844511],
        [0.00049677],
        [0.00844511],
        [0.        ],
        [0.00198708],
        

In [None]:




def assert_structure(df, structure):
    assert structure in df.columns, f"Structure {structure} not found"


def assert_deltaG(df, deltaG):
    assert deltaG in df.columns, f"deltaG {deltaG} not found"


def define_index(df, samp, construct, cluster, index):
    if index in ['all','full'] :
        return df.index
    if index == 'roi':
        assert [roi in df.columns for roi in ['ROI_start','ROI_stop']], 'ROI_start and ROI_stop not found'
        return list(range(int(get_series(df, samp, construct, cluster)['ROI_start']), int(get_series(df, samp, construct, cluster)['ROI_stop'])))
    if type(index) in [list,tuple]:
        assert [i in list(range(len(get_series(df, samp, construct, cluster)['sequence']))) for i in index], 'Index out of range'
        return index
    raise ValueError(f"Index {index} not recognized")

def filter_base_paired(df_loc, base_paired):
    # base_type = ['A','C','G','T']
    # base_index = 'roi', 'all', [93,95,96]
    # base_paired = True, False or None (=both) # default is None 

    if base_paired == True:
        df_loc = df_loc[df_loc['paired'] == True]
    elif base_paired == False:
        df_loc = df_loc[df_loc['paired'] == False]
    elif base_paired == None:
        pass
    return df_loc

def filter_index(df_loc, index):
    return df_loc.loc[index]

def filter_base_type(df_loc, base_type):
    df_loc = pd.concat([df_loc[df_loc['base'] == base] for base in base_type], axis=0)
    return df_loc

def filter(df_loc, base_type, index, base_paired):
    df_loc = filter_index(df_loc, index)
    df_loc = filter_base_paired(df_loc, base_paired)
    df_loc = filter_base_type(df_loc, base_type)
    return df_loc

def get_df(df, samp, construct, cols, cluster=0, structure='structure', deltaG='deltaG', base_type = ['A','C','G','T'], index='all', base_paired=None):

    assert_structure(df, structure)
    assert_deltaG(df, deltaG)
    cols = [c for c in cols if not (c.startswith('deltaG') or c.startswith('structure'))]
    cols = cols + [structure, deltaG]

    for col in cols:
        assert col in df.columns, f"Column {col} not found"

    df_loc = get_series(df, samp, construct, cluster)
    for col in [c for c in cols if type(df_loc[c]) in [str]]:
        df_loc[col] = list(df_loc[col])

    df_loc = pd.DataFrame({col: df_loc[col] for col in cols})

    for st in [col for col in cols if 'structure' in col]:
        df_loc['paired'] = [{'.':False,'(':True,')':True}[x] for x in df_loc[st]]
        df_loc = df_loc.drop(columns=st)
    
    df_loc = df_loc.rename(columns={'sequence':'base'})
    
    index = define_index(df_loc, samp, construct, cluster, index)
    df_loc = filter(df_loc, base_type, index, base_paired)
    return df_loc.sort_index()

def get_series(df, samp, construct, cluster):
    assert len(df_out := study.df[(study.df['construct'] == construct)&(study.df['samp'] == samp)&(study.df['cluster'] == cluster)]) <= 1, 'More than one row found'
    assert len(df_out) >= 1, 'No row found'
    return df_out.iloc[0]


class OutputPlot(object):
    def __init__(self, fig, ax, data) -> None:
        self.fig = fig
        self.ax = ax
        self.data = data

from dreem_nap import manipulator

class TestPlotter():
    def __init__(self):
        self.__man = manipulator.Manipulator()

    def mut_histogram(self, samp:str, construct:str, plot_type:str='index', figsize=(35,7), **kwargs)->None:
        """Plot the mutation rate of a specific (sample, construct).

        Args:
            samp: sample of interest.
            construct: construct of interest.
            plot_type: 'index' or 'partition'. 
                - 'index' uses bases numbers as index and the original construct bases as colors.
                - 'partition' uses original sequence bases as index and the partition of mutated bases as colors.
            figsize: figure size.
            **kwargs: 
                - keyword arguments for base_type, index, base_paired.
                - keyword arguments for matplotlib.pyplot
        
        Returns:
            OutputPlot: output plot data:
                - fig: figure object.
                - ax: axis object.
                - data: plotted data.
        """
        fig = plt.figure(figsize=figsize)

        df_use = self.df.set_index(['samp','construct'])
        
        if not plot_type in ['index','partition']:
            raise Exception(f"{plot_type} must be 'index' or 'partition', please check this argument")

        df_hist = pd.DataFrame()
        self.__man

        if plot_type == 'index':  # Plot the mutation rate for each base along the sequence

            mut_per_base = pd.DataFrame({'mut_rates': df_use['mut_rates'].loc[samp, construct]
                                        ,'base':list(df_use['sequence'].loc[samp, construct])})\
                                        .reset_index()\
                                        .set_index(['base', 'index'])
            df_hist.index = mut_per_base.reset_index()['index']

            for base in ['A','C','G','T']:
                df_hist[base] = pd.Series(dtype=float)
                df_hist[base] = mut_per_base.loc[base]

            ax = df_hist.plot.bar(stacked=True, color=['r','b','y','g'],  figsize=figsize)
            plt.title(f"sample {samp}, construct {construct}")

        if plot_type == 'partition': # Plot the partition of mutations for each base along the sequence
            for base in ['A','C','G','T']:
                df_hist[f"mod_bases_{base}"]  = np.array(df_use[f"mod_bases_{base}"].loc[samp, construct][1:])/df_use['info_bases'].loc[samp, construct][1:]

            df_hist.index = list(df_use['sequence'].loc[samp,construct])

            ax = df_hist.plot.bar(stacked=True, color=['r','b','y','g'], figsize=figsize)
        
        [getattr(plt, arg)(kwargs[arg]) for arg in kwargs if hasattr(plt, arg)] 

        return OutputPlot(fig, ax, df_hist)


class TestStudy(Study, TestPlotter):
    

cols = ['mut_bases','cov_bases', 'sequence', 'structure', 'deltaG']
print(get_df(df=study.df, samp=samp, construct=construct, cluster=0, cols=cols, index='all', base_paired=None, base_type=['A','C','G','T'], deltaG='deltaG', structure='structure'))
#df = pd.DataFrame(df_loc[cols].str.split('',1),columns=cols)


## Make plots
Plot this study with different plots. Check out the list of plots in the plot module

### Mutation histogram

In [None]:
salt.mut_histogram(samp='A6', construct='7695',\
             plot_type='index', figsize=(28,4))

### DeltaG
DeltaG vs mutation rate for all constructs of a sample

In [None]:
salt.deltaG(samp='A6',bases_type=['A','C'], roi_range='all')  # currently bugged due to DREEM, work in progress

### DeltaG_basewise
Plot the mutation rate of each paired-predicted base of the ROI for each construct of a sample, w.r.t the deltaG estimation

In [None]:
salt.deltaG_basewise(samp='A6', roi_range=[94,95,96,97])

###  Heatmap
Here a heatmap of the minimum base coverage across all samples. Column can be any column that contains a single scalar value.

In [None]:
salt.heatmap(column='min_cov_bases') 

### Mutation rate vs 1-base_pairing
Plot a mutation rate histogram, a 1-base_pairing probability histogram, and a scatter plot fitting the mutation rate vs 1-base_pairing. 

In [None]:
# currently bugged due to DREEM, work in progress
salt.mut_rate_vs_base_non_pairing_prob(samp='A6', construct='7695') # shows the mutation rate vs base non-pairing probability

### Base coverage
Plot the base coverage of a specific (sample, construct)

In [None]:
salt.base_coverage(samp='A6', construct='9572')

### Base coverage for all constructs
Plot the base-coverage of the worst-covered base of the Region of Interest, for each construct. 

In [None]:
salt.base_coverage_ROI_for_all_constructs()


In [None]:
salt.random_9_base_coverage()

In [None]:
random_9_base_coverage()
sample_coverage_distribution()
valid_construct_per_sample()
sliding_window_r2_gini()
study_base()
study_sample()
base_wise_mut_vs_prob()
correlation_n_samples()


In [None]:
salt.study_base(construct='9572', structure='full',
                roi_range=[40,63,78,94])

### You can load every study from a file using Study.load_studies()

In [None]:
studies = Study.load_studies(cfg['path_to_studies'])
for study in studies.values():
    if study.name != 'all_samples':
        study.load_df_from_local_files(path_to_data= cfg['path_to_data'], 
                                   min_cov_bases= cfg['min_cov_bases'])

studies['temperature'].df.head()

### Studies can be called from the dictionary using their name

In [None]:
studies['temperature'].mut_histogram(studies['temperature'].samples[0], '9572', 'index')

In [None]:
for study in studies.values():
    if study.name != 'all_samples':
        for s in study.samples:
            for construct in study.constructs:
                study.mut_histogram(s, construct, 'index')
                util.save_fig(f"data/figs/date/mutation histogram/{study.name}/{s}/{construct}.png")
                plt.close()


In [None]:
class TestStudy(Study):
    def mut_histogram(self, samp:str, construct:str, plot_type:str, figsize=(35,7))->None:
        """Plot the mutation rate of a specific (sample, construct).

        Args:
        plot_type: 'index' or 'partition'. 
            - 'index' uses bases numbers as index and the original construct bases as colors.
            - 'partition' uses original sequence bases as index and the partition of mutated bases as colors.
        samp: sample of interest.
        construct: construct of interest.
        """

        df_use = self.df.set_index(['samp','construct'])
        
        if not plot_type in ['index','partition']:
            raise Exception(f"{plot_type} must be 'index' or 'partition', please check this argument")

        if plot_type == 'index':  # Plot the mutation rate for each base along the sequence

            mut_per_base = pd.DataFrame({'mut_rates': df_use['mut_rates'].loc[samp, construct]
                                        ,'base':list(df_use['sequence'].loc[samp, construct])})\
                                        .reset_index()\
                                        .set_index(['base', 'index'])
            df_hist = pd.DataFrame()
            df_hist.index = mut_per_base.reset_index()['index']

            for base in ['A','C','G','T']:
                df_hist[base] = pd.Series(dtype=float)
                df_hist[base] = mut_per_base.loc[base]

            #df_hist.index = mut_per_base.reset_index()['base']

            ax = df_hist.plot.bar(stacked=True, color=['r','b','y','g'],  figsize=figsize)
            plt.title(f"sample {samp}, construct {construct}")

        if plot_type == 'partition': # Plot the partition of mutations for each base along the sequence
            df_hist = pd.DataFrame()
            for base in ['A','C','G','T']:
                df_hist[f"mod_bases_{base}"]  = np.array(df_use[f"mod_bases_{base}"].loc[samp, construct][1:])/df_use['info_bases'].loc[samp, construct][1:]

            df_hist.index = list(df_use['sequence'].loc[samp,construct])

            ax = df_hist.plot.bar(stacked=True, color=['r','b','y','g'], figsize=figsize)

        return ax


# Load configuration
with open('config.yml', 'r') as ymlfile:
    cfg = yaml.safe_load(ymlfile)
for k,v in cfg.items():
    print(k,(30-len(k))*'_',v)

mpl.rcParams['figure.dpi'] = cfg['mpl_rcParams_figure_dpi'] # the highest the resolution, the slowest the plotting

# Create a study
salt = TestStudy().from_dict({'name': 'salt',
                         'description': 'Change the Na concentration', 
                         'samples': ['A6', 'B6', 'C6', 'D6', 'E6'], 
                         'title': 'Na quantity [M]', 
                         'conditions': [0.15, 0.3, 0.6, 1.0, 1.2]})

# Load data
salt.load_df_from_local_files(path_to_data= cfg['path_to_data'], 
                              min_cov_bases= cfg['min_cov_bases'])

# Show the dataframe
salt.df.head()

In [None]:
import pickle
from os import listdir

listdir('../data/DEMULTIPLEXED/')

with open(f"../data/DEMULTIPLEXED/A4/mh.p",'rb') as f:
    pick =pickle.load(f)
    print(dir(pick['9572']))


In [None]:
for s in salt.samples:
    salt.mut_histogram(s, '9572', 'index')