# DMS-MaPseq

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../../src')
from util import *
from config import *
import plots
import ipynbname
from study_gen import study

import plotly.graph_objects as go
from plotly.subplots import make_subplots

### # Mutations per read
- histogram
- x-axis = # mutations
- y-axis = # reads


In [2]:
unique_samples = study.df['sample'].unique()
fig = make_subplots(rows=len(unique_samples), cols=1, vertical_spacing=0.2/len(unique_samples),
                     subplot_titles=['Number of mutations per read - {}'.format(sample) for sample in unique_samples])
for i_s, sample in enumerate(unique_samples):
    fig.add_trace( plots.mutations_per_read(study, sample), row=i_s+1, col=1 )
    fig.update_yaxes(title='Count')
    fig.update_xaxes(dtick=10)


fig.update_layout(autosize=True, height=5000, title='Number of mutation per read across samples')

save_plotly_fig(ipynbname.path(), '[A] Mutations per read', fig)
# fig.show()

In [3]:
# study.get_df(sample='05_1_S24_reads')
study.df['sample'].unique()[0]

### Mutation identity at each position
- stacked bar graph (ACUG coloring)
- x-axis = position (number/base/both??)
- y-axis = # reads


In [4]:
%autoreload 2
for sample in study.df['sample'].unique():
    
    unique_families = study.get_df(sample=sample)['family'].unique()
    for fam in unique_families:

        unique_constructs = study.get_df(sample=sample, family=fam)['construct'].unique()

        fig = make_subplots(rows=len(unique_constructs), cols=1, vertical_spacing=0.2/len(unique_constructs),
                            subplot_titles=['Mutation identity at each position - {}'.format(cst) for cst in unique_constructs])
        for i_c, construct in enumerate(unique_constructs):
            muts_identity = plots.mutation_identity_at_each_position(study, sample, construct)

            for bar in muts_identity['fig']:
                fig.add_trace( bar, row=i_c+1, col=1 )
            
            fig.update_xaxes(tickangle=0, 
                    tickvals=np.arange(len(muts_identity['data'].index)), ticktext=list(muts_identity['data'].index), tickfont={'size':8},
                    row=i_c+1, col=1)
                
        for trace, name in zip(fig["data"][:4], ['A','C','G','T']):
            trace.update(showlegend=True)
            trace["name"] = name
        
        fig.update_yaxes(title='Mutation fraction')
        fig.update_layout(barmode='stack', height=500*len(unique_constructs), width=1500)
        save_plotly_fig(ipynbname.path(), '[B] Mutation identity at each position/{}/{}'.format(sample, fam), fig)
    #     fig.show()
    #     break
    # break

### Mutation fraction at each position 
- bar graph (ACUG coloring)
- x-axis = position (number/base/both??)
- y-axis = # reads


In [5]:
for sample in study.df['sample'].unique():
    
    unique_families = study.get_df(sample=sample)['family'].unique()
    for fam in unique_families:

        unique_constructs = study.get_df(sample=sample, family=fam)['construct'].unique()

        fig = make_subplots(rows=len(unique_constructs), cols=1, vertical_spacing=0.2/len(unique_constructs),
                            subplot_titles=['Mutation fraction at each position - {}'.format(cst) for cst in unique_constructs])
        for i_c, construct in enumerate(unique_constructs):
            muts_identity = plots.mutation_fraction_at_each_position(study, sample, construct)

            for bar in muts_identity['fig']:
                fig.add_trace( bar, row=i_c+1, col=1 )
            
            fig.update_xaxes(tickangle=0, 
                    tickvals=np.arange(len(muts_identity['data'].index)), ticktext=list(muts_identity['data'].index), tickfont={'size':8},
                    row=i_c+1, col=1)
                
        for trace, name in zip(fig["data"][:4], ['A','C','G','T']):
            trace.update(showlegend=True)
            trace["name"] = name
        
        fig.update_yaxes(title='Mutation fraction')
        fig.update_layout(barmode='stack', height=500*len(unique_constructs), width=1500)
        save_plotly_fig(ipynbname.path(), '[C] Mutation fraction at each position/{}/{}'.format(sample, fam), fig)
    #     fig.show()
    #     break
    # break

### Read coverage per position
- bar graph
- x-axis = position (number/base/both??)
- y-axis = coverage fraction

In [13]:
for sample in study.df['sample'].unique():
    
    unique_families = study.get_df(sample=sample)['family'].unique()
    for fam in unique_families:

        unique_constructs = study.get_df(sample=sample, family=fam)['construct'].unique()

        fig = make_subplots(rows=len(unique_constructs), cols=1, vertical_spacing=0.2/len(unique_constructs),
                            subplot_titles=['Read coverage per position - {}'.format(cst) for cst in unique_constructs])
        for i_c, construct in enumerate(unique_constructs):
            read_coverage = plots.read_coverage_per_position(study, sample, construct)

            for bar in read_coverage['fig']:
                fig.add_trace( bar, row=i_c+1, col=1 )
        
        # print a legend for each section
        for trace, name in zip(fig["data"][:len(read_coverage['data']['section'])], read_coverage['data']['section']):
            trace.update(showlegend=True)
            trace["name"] = name
        
        fig.update_yaxes(title='Read coverage')
        fig.update_layout(barmode='stack', height=500*len(unique_constructs), width=1300)
        save_plotly_fig(ipynbname.path(), '[D] Read coverage per position/{}/{}'.format(sample, fam), fig)
    #     fig.show()
    #     break
    # break