# Differential Expression and LogFC Calculation
* Create common plots with DE results (Volcano, MA, etc.).
* Create plots for QCing pydeseq2 (specifically problems with logfc calculations).
* Recalculate TPM with normalized counts and save to layer.

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import lines
import anndata as ad
import scanpy as sc
from itertools import combinations
import warnings

from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats

from plotly.subplots import make_subplots
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode, iplot

init_notebook_mode(connected=True)
warnings.filterwarnings('ignore')

In [None]:
NUM_CPUS = 8
LOG2_FC_THRESH = np.log2(2.0)
NLOG10_PADJ_THRESH = -1*np.log10(0.05)

# DATA_PATH = os.getcwd()

# DATA_PATH = '/data/expression_atlas/v1/GSE122459'
# DATA_PATH = '/data/expression_atlas/v1/GSE110914/'
# DATA_PATH = '/data/expression_atlas/v1/GSE162828/'
# DATA_PATH = '/data/expression_atlas/v1/GSE102371/'
DATA_PATH = '/data/expression_atlas/v1/GSE80183/'

RESULTS_PATH = '' + 'de_results/%s' % DATA_PATH.rstrip('/').split('/')[-1]

DDS_TRANSCRIPT_FH = RESULTS_PATH + '_dds_transcript.h5_ad'
DDS_GENE_FH = RESULTS_PATH + '_dds_gene.h5_ad'

In [None]:
# Read in previously created dds objects, should be filtered and have correct
# metadata defined in obs.

dds = ad.read_h5ad(DDS_TRANSCRIPT_FH)
dds_gene = ad.read_h5ad(DDS_GENE_FH)

contrasts = dds.uns['contrasts']


### Fetch ensembl gene id - external gene mappings from biomart.

In [None]:
# Fetch ensembl gene id - external gene name mappings from biomart.

dataset = Dataset(
                name='hsapiens_gene_ensembl',
                host='http://www.ensembl.org',
            )

external_gene_mapping = dataset.query(
                                attributes=['ensembl_gene_id', 'external_gene_name']
                            )

external_gene_mapping.rename({'Gene stable ID': 'gene_id', 'Gene name': 'gene_name'}, axis=1, inplace=True)

external_gene_mapping

### DE2.1 Volcano plots for all contrasts.

In [None]:
# Create volcano plots of DE-transcripts and -genes.

fig, ax = plt.subplots(len(contrasts),2,figsize=(10,5*len(contrasts)))

ax = ax.reshape((-1,2,))

scale_marker=2

for i, k in enumerate(contrasts.keys()):
    
    ax[i,0].scatter(
            dds.uns['stat_results'][k]['log2FoldChange'], 
            dds.uns['stat_results'][k]['-log10_padj'], 
            alpha=0.1,
            s=scale_marker*np.log2(dds.uns['stat_results'][k]['baseMean']),
            c=[ 
                '#1f77b4' if (abs(lf) > LOG2_FC_THRESH and nlp > NLOG10_PADJ_THRESH) else 
                '#ff7f0e' for i, (lf, nlp) in dds.uns['stat_results'][k][['log2FoldChange','-log10_padj']].iterrows()
            ],
        )

    ax[i,1].scatter(
            dds_gene.uns['stat_results'][k]['log2FoldChange'], 
            dds_gene.uns['stat_results'][k]['-log10_padj'], 
            alpha=0.05,
            s=scale_marker*np.log2(dds_gene.uns['stat_results'][k]['baseMean']),
            c=[ 
                '#1f77b4' if (abs(lf) > LOG2_FC_THRESH and nlp > NLOG10_PADJ_THRESH) else 
                '#ff7f0e' for i, (lf, nlp) in dds_gene.uns['stat_results'][k][['log2FoldChange','-log10_padj']].iterrows()
            ],
        )

    ax[i,0].set_xlabel('log2 FC')
    ax[i,1].set_xlabel('log2 FC')
    ax[i,0].set_ylabel('-log10 padj')

    ax[i,0].set_title('%s Transcript' % k)
    ax[i,1].set_title('%s Gene' % k)
    
    element_range = np.rint(np.linspace(
                        1,
                        5*round(max(np.log2(dds_gene.uns['stat_results'][k]['baseMean']))/5),
                        4, 
                    ))

    legend_elements = [lines.Line2D(
                            [0], 
                            [0], 
                            lw=0, 
                            marker="o", 
                            linestyle=None, 
                            markersize=(scale_marker*s)**0.5,
                        ) for s in element_range]

    legend = ax[i,1].legend(
                    legend_elements,
                    element_range,
                    frameon=False, 
                    loc='upper left', 
                    bbox_to_anchor=(1.,1.),
                    title='log2 mean expression'
                )
    ax[i,1].add_artist(legend)
    
    color_legend = ax[i,1].legend(
                [
                    lines.Line2D([0], [0], lw=0, marker='o', linestyle=None, markerfacecolor='#1f77b4'),
                    lines.Line2D([0], [0], lw=0, marker='o', linestyle=None, markerfacecolor='#ff7f0e'),
                    ],
                [
                    f'log2FC > {LOG2_FC_THRESH} and -log10_padj > {NLOG10_PADJ_THRESH:.2f}',
                    f'log2FC < {LOG2_FC_THRESH} and -log10_padj < {NLOG10_PADJ_THRESH:.2f}',
                    ],
                frameon=False,
                loc='upper left',
                bbox_to_anchor=(1.,0.5,),
                )   


In [None]:
# Create volcano plots of DE-transcripts and -genes with plotly.

fig = make_subplots(
                rows=len(contrasts), 
                cols=2,
                subplot_titles=[
                        i for j in contrasts.keys() for i in ('%s Transcript' % j, '%s Gene' % j)
                    ],
            )

scale_marker=0.75

for i, k in enumerate(contrasts.keys()):

    filter = (abs(dds.uns['stat_results'][k]['log2FoldChange']) > LOG2_FC_THRESH) & \
                            (dds.uns['stat_results'][k]['-log10_padj'] > NLOG10_PADJ_THRESH)

    fig.add_trace(
        go.Scatter(
                x=dds.uns['stat_results'][k]['log2FoldChange'][filter], 
                y=dds.uns['stat_results'][k]['-log10_padj'][filter], 
                mode='markers',
                marker_size=scale_marker*np.log2(dds.uns['stat_results'][k]['baseMean'][filter]),
                marker_color='#1f77b4',
                text=dds.uns['stat_results'][k].index[filter],
                opacity=0.5,
                name=f'log2FC > {LOG2_FC_THRESH} and<br> -log10_padj > {NLOG10_PADJ_THRESH:.2f}',
                showlegend=False,
            ),
        row=i+1, 
        col=1,
    )

    fig.add_trace(
        go.Scatter(
                x=dds.uns['stat_results'][k]['log2FoldChange'][~filter], 
                y=dds.uns['stat_results'][k]['-log10_padj'][~filter], 
                mode='markers',
                marker_size=scale_marker*np.log2(dds.uns['stat_results'][k]['baseMean'][~filter]),
                marker_color='#ff7f0e',
                text=dds.uns['stat_results'][k].index[~filter],
                opacity=0.5,
                name=f'log2FC < {LOG2_FC_THRESH} and<br> -log10_padj < {NLOG10_PADJ_THRESH:.2f}',
                showlegend=False,
            ),
        row=i+1, 
        col=1,
    )

    fig.update_xaxes(title_text='log FC', row=i+1, col=1)
    fig.update_yaxes(title_text='-log10 padj', row=i+1, col=1)

    
    filter = (abs(dds_gene.uns['stat_results'][k]['log2FoldChange']) > LOG2_FC_THRESH) & \
                                (dds_gene.uns['stat_results'][k]['-log10_padj'] > NLOG10_PADJ_THRESH)


    fig.add_trace(
        go.Scatter(
                x=dds_gene.uns['stat_results'][k]['log2FoldChange'][filter], 
                y=dds_gene.uns['stat_results'][k]['-log10_padj'][filter], 
                mode='markers',
                marker_size=scale_marker*np.log2(dds_gene.uns['stat_results'][k]['baseMean'][filter]),
                marker_color='#1f77b4',
                text=dds_gene.uns['stat_results'][k].index[filter],
                opacity=0.5,
                name=f'log2FC > {LOG2_FC_THRESH} and<br> -log10_padj > {NLOG10_PADJ_THRESH:.2f}',
            ),
        row=i+1, 
        col=2,
    )

    fig.add_trace(
        go.Scatter(
                x=dds_gene.uns['stat_results'][k]['log2FoldChange'][~filter], 
                y=dds_gene.uns['stat_results'][k]['-log10_padj'][~filter], 
                mode='markers',
                marker_size=scale_marker*np.log2(dds_gene.uns['stat_results'][k]['baseMean'][~filter]),
                marker_color='#ff7f0e',
                text=dds_gene.uns['stat_results'][k].index[~filter],
                opacity=0.5,
                name=f'log2FC < {LOG2_FC_THRESH} and<br> -log10_padj < {NLOG10_PADJ_THRESH:.2f}',
            ),
        row=i+1, 
        col=2,
    )

    fig.update_xaxes(title_text='log FC', row=i+1, col=2)
    fig.update_yaxes(title_text='-log10 padj', row=i+1, col=2)


fig.update_layout(
                height=500*len(contrasts), 
                width=1000,
                plot_bgcolor='white', 
                legend= {'itemsizing': 'constant'}, 
            )


fig.update_xaxes(
                showline=True,
                showticklabels=True,
                linecolor='black',
                linewidth=1.2,
                ticks='outside',
                mirror='allticks',
                tickwidth=2.4,
                tickcolor='black',
            )

fig.update_yaxes(
                showline=True,
                showticklabels=True,
                linecolor='black',
                linewidth=1.2,
                ticks='outside',
                mirror='allticks',
                tickwidth=2.4,
                tickcolor='black',
            )

iplot(fig)

### DE2.2 MA plots for all contrasts.

In [None]:
# Create MA plots of DE-transcripts and -genes.

fig, ax = plt.subplots(len(contrasts),2,figsize=(10,5*len(contrasts)))

ax = ax.reshape((-1,2,))

for i,k in enumerate(contrasts.keys()):

    ax[i,0].scatter(
            np.log2(dds.uns['stat_results'][k]['baseMean']), 
            dds.uns['stat_results'][k]['log2FoldChange'], 
            alpha=0.1,
            s=dds.uns['stat_results'][k]['-log10_padj'],
            c=[ 
                '#1f77b4' if (abs(lf) > LOG2_FC_THRESH and nlp > NLOG10_PADJ_THRESH) else 
                '#ff7f0e' for i, (lf, nlp) in dds.uns['stat_results'][k][['log2FoldChange','-log10_padj']].iterrows()
            ],
        )

    ax[i,1].scatter(
            np.log2(dds_gene.uns['stat_results'][k]['baseMean']), 
            dds_gene.uns['stat_results'][k]['log2FoldChange'], 
            alpha=0.1,
            s=dds_gene.uns['stat_results'][k]['-log10_padj'],
            c=[ 
                '#1f77b4' if (abs(lf) > LOG2_FC_THRESH and nlp > NLOG10_PADJ_THRESH) else 
                '#ff7f0e' for i, (lf, nlp) in dds_gene.uns['stat_results'][k][['log2FoldChange','-log10_padj']].iterrows()
            ],
        )

    ax[i,0].set_xlabel('log2 mean expression')
    ax[i,1].set_xlabel('log2 mean expression')
    ax[i,0].set_ylabel('log2 FC expression')

    ax[i,0].set_title('%s Transcript' % k)
    ax[i,1].set_title('%s Gene' % k)


    element_range = np.rint(np.linspace(
                    1,
                    5*round(max(dds_gene.uns['stat_results'][k]['-log10_padj'])/5),
                    4, 
                ))

    legend_elements = [lines.Line2D(
                            [0], 
                            [0], 
                            lw=0, 
                            marker="o", 
                            linestyle=None, 
                            markersize=s**0.5,
                        ) for s in element_range]

    legend = ax[i,1].legend(
                    legend_elements,
                    element_range,
                    frameon=False, 
                    loc='upper left', 
                    bbox_to_anchor=(1.,1.),
                    title='-log10_padj'
                )
    ax[i,1].add_artist(legend)
    
    color_legend = ax[i,1].legend(
                [
                    lines.Line2D([0], [0], lw=0, marker='o', linestyle=None, markerfacecolor='#1f77b4'),
                    lines.Line2D([0], [0], lw=0, marker='o', linestyle=None, markerfacecolor='#ff7f0e'),
                    ],
                [
                    f'log2FC > {LOG2_FC_THRESH} and -log10_padj > {NLOG10_PADJ_THRESH:.2f}',
                    f'log2FC < {LOG2_FC_THRESH} and -log10_padj < {NLOG10_PADJ_THRESH:.2f}',
                    ],
                frameon=False,
                loc='upper left',
                bbox_to_anchor=(1.,0.5,),
                )

In [None]:
# Create MA plots of DE-transcripts and -genes with plotly.

fig = make_subplots(
                rows=len(contrasts), 
                cols=2,
                subplot_titles=[
                        i for j in contrasts.keys() for i in ('%s Transcript' % j, '%s Gene' % j)
                    ],
            )

scale_marker=2.5

for i, k in enumerate(contrasts.keys()):

    filter = (abs(dds.uns['stat_results'][k]['log2FoldChange']) > LOG2_FC_THRESH) & \
                            (dds.uns['stat_results'][k]['-log10_padj'] > NLOG10_PADJ_THRESH)

    fig.add_trace(
        go.Scatter(
                x=np.log2(dds.uns['stat_results'][k]['baseMean'][filter]), 
                y=dds.uns['stat_results'][k]['log2FoldChange'][filter], 
                mode='markers',
                marker_size=scale_marker*dds.uns['stat_results'][k]['-log10_padj'][filter],
                marker_color='#1f77b4',
                text=dds.uns['stat_results'][k].index[filter],
                opacity=0.5,
                name=f'log2FC > {LOG2_FC_THRESH} and<br> -log10_padj > {NLOG10_PADJ_THRESH:.2f}',
                showlegend=False,
            ),
        row=i+1, 
        col=1,
    )

    fig.add_trace(
        go.Scatter(
                x=np.log2(dds.uns['stat_results'][k]['baseMean'][~filter]), 
                y=dds.uns['stat_results'][k]['log2FoldChange'][~filter], 
                mode='markers',
                marker_size=scale_marker*dds.uns['stat_results'][k]['-log10_padj'][~filter],
                marker_color='#ff7f0e',
                text=dds.uns['stat_results'][k].index[~filter],
                opacity=0.5,
                name=f'log2FC < {LOG2_FC_THRESH} and<br> -log10_padj < {NLOG10_PADJ_THRESH:.2f}',
                showlegend=False,
            ),
        row=i+1, 
        col=1,
    )

    fig.update_xaxes(title_text='log2 mean expression', row=i+1, col=1)
    fig.update_yaxes(title_text='log2 FC', row=i+1, col=1)

    
    filter = (abs(dds_gene.uns['stat_results'][k]['log2FoldChange']) > LOG2_FC_THRESH) & \
                                (dds_gene.uns['stat_results'][k]['-log10_padj'] > NLOG10_PADJ_THRESH)


    fig.add_trace(
        go.Scatter(
                x=np.log2(dds_gene.uns['stat_results'][k]['baseMean'][filter]), 
                y=dds_gene.uns['stat_results'][k]['log2FoldChange'][filter], 
                mode='markers',
                marker_size=scale_marker*dds_gene.uns['stat_results'][k]['-log10_padj'][filter],
                marker_color='#1f77b4',
                text=dds_gene.uns['stat_results'][k].index[filter],
                opacity=0.5,
                name=f'log2FC > {LOG2_FC_THRESH} and<br> -log10_padj > {NLOG10_PADJ_THRESH:.2f}',
            ),
        row=i+1, 
        col=2,
    )

    fig.add_trace(
        go.Scatter(
                x=np.log2(dds_gene.uns['stat_results'][k]['baseMean'][~filter]), 
                y=dds_gene.uns['stat_results'][k]['log2FoldChange'][~filter], 
                mode='markers',
                marker_size=scale_marker*dds_gene.uns['stat_results'][k]['-log10_padj'][~filter],
                marker_color='#ff7f0e',
                text=dds_gene.uns['stat_results'][k].index[~filter],
                opacity=0.5,
                name=f'log2FC < {LOG2_FC_THRESH} and<br> -log10_padj < {NLOG10_PADJ_THRESH:.2f}',
            ),
        row=i+1, 
        col=2,
    )

    fig.update_xaxes(title_text='log2 mean expression', row=i+1, col=2)
    fig.update_yaxes(title_text='log2 FC', row=i+1, col=2)


fig.update_layout(
                height=500*len(contrasts), 
                width=1000,
                plot_bgcolor='white', 
                legend= {'itemsizing': 'constant'}, 
            )

fig.update_xaxes(
                showline=True,
                showticklabels=True,
                linecolor='black',
                linewidth=1.2,
                ticks='outside',
                mirror='allticks',
                tickwidth=2.4,
                tickcolor='black',
            )

fig.update_yaxes(
                showline=True,
                showticklabels=True,
                linecolor='black',
                linewidth=1.2,
                ticks='outside',
                mirror='allticks',
                tickwidth=2.4,
                tickcolor='black',
            )

iplot(fig)

### DE2.3 DE genes/transcripts for each contrasts filtered by log2fc and padj thresholds.

In [None]:
# Filter summary tables based upon thresholds.

for i,k in enumerate(contrasts.keys()):

    markers = dds.uns['stat_results'][k][
                                        (abs(dds.uns['stat_results'][k]['log2FoldChange']) > LOG2_FC_THRESH) & 
                                        (dds.uns['stat_results'][k]['-log10_padj'] > NLOG10_PADJ_THRESH)
                                    ]
    print('%s Transcript: %s'  % (k, len(markers)))
    print(markers.sort_values('log2FoldChange', axis=0))

    markers = dds_gene.uns['stat_results'][k][
                                        (abs(dds_gene.uns['stat_results'][k]['log2FoldChange']) > LOG2_FC_THRESH) & 
                                        (dds_gene.uns['stat_results'][k]['-log10_padj'] > NLOG10_PADJ_THRESH)
                                    ]
    print('%s Gene: %s' % (k, len(markers)))
    print(markers.sort_values('log2FoldChange', axis=0))


In [None]:
# Run LFC calculations on raw inputs as gut-check for issues with pydeseq2 LFC calculations.

# dds.varm['LFC_raw'] = dds.varm['LFC'].copy()
# dds_gene.varm['LFC_raw'] = dds_gene.varm['LFC'].copy()


for k, v in contrasts.items():

    dds.varm['LFC_reflevel_%s_raw' % v[2]] = dds.varm['LFC_reflevel_%s' % v[2]].copy()
    dds_gene.varm['LFC_reflevel_%s_raw' % v[2]] = dds_gene.varm['LFC_reflevel_%s' % v[2]].copy()

    dds.varm['LFC_reflevel_%s_raw' % v[2]]['%s_%s_vs_%s' % tuple(v)] = np.log2(
                                                        np.mean(dds.X[dds.obs[v[0]] == v[1]],axis=0) / 
                                                            np.mean(dds.X[dds.obs[v[0]] == v[2]], axis=0)
                                                    )
    
    dds_gene.varm['LFC_reflevel_%s_raw' % v[2]]['%s_%s_vs_%s' % tuple(v)] = np.log2(
                                                        np.mean(dds_gene.X[dds_gene.obs[v[0]] == v[1]],axis=0) / 
                                                            np.mean(dds_gene.X[dds_gene.obs[v[0]] == v[2]], axis=0)
                                                    )
                                                


### DE2.4 Plot raw log2fc vs. squeezed log2fc for each contrasts (qc issues with pydeseq2 log2fc calculations).

In [None]:
# Plot raw LFC calculations vs. squeezed LFC calculations.

fig, ax = plt.subplots(len(contrasts),2,figsize=(10,5*len(contrasts)))

ax = ax.reshape((-1,2,))

for i,(k,v) in enumerate(contrasts.items()):
    
    ax[i,0].scatter(
            dds.varm['LFC_reflevel_%s' % v[2]]['%s_%s_vs_%s' % tuple(v)],
            dds.varm['LFC_reflevel_%s_raw' % v[2]]['%s_%s_vs_%s' % tuple(v)],
            alpha=0.1,
            s=0.1,
        )
    
    ax[i,1].scatter(
            dds_gene.varm['LFC_reflevel_%s' % v[2]]['%s_%s_vs_%s' % tuple(v)],
            dds_gene.varm['LFC_reflevel_%s_raw' % v[2]]['%s_%s_vs_%s' % tuple(v)],
            alpha=0.1,
            s=0.1,
        )

    ax[i,0].set_xlabel('log2 FC squeezed')
    ax[i,1].set_xlabel('log2 FC squeezed')
    ax[i,0].set_ylabel('log2 FC raw')

    ax[i,0].set_title('%s Transcript' % k)
    ax[i,1].set_title('%s Gene' % k)



### DE2.5 Plot log2fc gene vs. log2fc transcript.

In [None]:
# Transfer gene to transcript mappings to dds.var dataframe. Plot LogFC between transcript- 
# and gene-level quantifications.

gene_transcript_mapping = dict(zip(dds.uns['gene_transcript_mapping']['tx'],dds.uns['gene_transcript_mapping']['gene_id']))

fig, ax = plt.subplots(len(contrasts),1,figsize=(5,5*len(contrasts)))

if type(ax) != np.ndarray:
    ax = np.array(ax)

ax = ax.reshape((-1,1,))

for i,k in enumerate(contrasts.keys()):
    
    dds.uns['stat_results'][k]['gene_id'] = dds.uns['stat_results'][k].index.map(lambda x: gene_transcript_mapping[x])

    df = dds.uns['stat_results'][k].merge(dds_gene.uns['stat_results'][k], left_on='gene_id', right_index=True)
    ax[i,0].scatter(
            df['log2FoldChange_x'], 
            df['log2FoldChange_y'],
            alpha=0.05,
            s=5*df['-log10_padj_x']
        )
    
    ax[i,0].set_xlabel('log2 FC transcript')
    ax[i,0].set_ylabel('log2 FC gene')

    ax[i,0].set_title('%s Transcript v Gene logFC' % k)

    element_range = np.rint(np.linspace(
                        1,
                        5*round(max(dds_gene.uns['stat_results'][k]['-log10_padj'])/5),
                        4, 
                    ))

    legend_elements = [lines.Line2D(
                            [0], 
                            [0], 
                            lw=0, 
                            marker="o", 
                            linestyle=None, 
                            markersize=s**0.5,
                        ) for s in element_range]

    legend = ax[i,0].legend(
                    legend_elements,
                    element_range,
                    frameon=False, 
                    loc='upper left', 
                    bbox_to_anchor=(1.,1.),
                    title='-log10_padj transcript'
                )


In [None]:
# Transfer gene to transcript mappings to dds.var dataframe. Plot LogFC between transcript- 
# and gene-level quantifications with plotly.

gene_transcript_mapping = dict(zip(dds.uns['gene_transcript_mapping']['tx'],dds.uns['gene_transcript_mapping']['gene_id']))

fig = make_subplots(
                rows=len(contrasts), 
                cols=1,
                subplot_titles=[
                        'Gene_vs_Transcript_%s' % k for k in contrasts.keys()
                    ],
            )

scale_marker=5

for i,k in enumerate(contrasts.keys()):

    dds.uns['stat_results'][k]['gene_id'] = dds.uns['stat_results'][k].index.map(lambda x: gene_transcript_mapping[x])

    df = dds.uns['stat_results'][k].merge(dds_gene.uns['stat_results'][k], left_on='gene_id', right_index=True)

    fig.add_trace(
        go.Scatter(
                x=df['log2FoldChange_x'],
                y=df['log2FoldChange_y'],
                mode='markers',
                marker_size=scale_marker*df['-log10_padj_x'], 
                text=[*zip(df.index,df['gene_id'])],
                opacity=0.5,
                showlegend=False,
            ),
        row=i+1, 
        col=1,
    )

    fig.update_xaxes(title_text='log2 FC transcript', row=i+1, col=1)
    fig.update_yaxes(title_text='log2 FC gene', row=i+1, col=1)

fig.update_layout(
                height=500*len(contrasts), 
                width=500,
                plot_bgcolor='white', 
            )

fig.update_xaxes(
                showline=True,
                showticklabels=True,
                linecolor='black',
                linewidth=1.2,
                ticks='outside',
                mirror='allticks',
                tickwidth=2.4,
                tickcolor='black',
            )

fig.update_yaxes(
                showline=True,
                showticklabels=True,
                linecolor='black',
                linewidth=1.2,
                ticks='outside',
                mirror='allticks',
                tickwidth=2.4,
                tickcolor='black',
            )

iplot(fig)



### DE2.6 plot log2fc of all pairs of contrasts.

In [None]:
# Plot LogFC between all combinations of differnt contrasts. 

contrast_combinations = [(k_1, k_2) for k_1, k_2 in combinations(contrasts.keys(), 2)]

if len(contrast_combinations) > 0:

    fig, ax = plt.subplots(len(contrast_combinations),2,figsize=(10,5*len(contrast_combinations)))

    ax = ax.reshape((-1,2,))

    for i, (k_1, k_2) in enumerate(contrast_combinations):

        ax[i,0].scatter(
                dds.uns['stat_results'][k_1]['log2FoldChange'], 
                dds.uns['stat_results'][k_2]['log2FoldChange'], 
                s=1, 
                alpha=0.1,
            )
        
        ax[i,0].set_xlabel('log2 FC %s %s' % ('transcript',k_1))
        ax[i,0].set_ylabel('log2 FC %s %s' % ('transcript',k_2))

        
        ax[i,1].scatter(
                dds_gene.uns['stat_results'][k_1]['log2FoldChange'], 
                dds_gene.uns['stat_results'][k_2]['log2FoldChange'], 
                s=1, 
                alpha=0.1,
            )
        
        ax[i,1].set_xlabel('log2 FC %s %s' % ('gene',k_1))
        ax[i,1].set_ylabel('log2 FC %s %s' % ('gene',k_2))


In [None]:
# Calculate TPM from normailzed counts, set normed_tpm layer.

dds.obs['sample_sums'] = (dds.layers['normed_counts'] / np.reshape(dds.var['length'], (1,-1))).sum(axis=1)
dds.layers['normed_tpm'] = ((dds.layers['normed_counts'] / np.reshape(dds.var['length'], (1,-1))) / 
                                np.reshape(dds.obs['sample_sums'], (-1,1)))*1e6

dds_gene.obs['sample_sums'] = (dds_gene.layers['normed_counts'] / np.reshape(dds_gene.var['length'], (1,-1))).sum(axis=1)
dds_gene.layers['normed_tpm'] = ((dds_gene.layers['normed_counts'] / np.reshape(dds_gene.var['length'], (1,-1))) / 
                                 np.reshape(dds_gene.obs['sample_sums'], (-1,1)))*1e6

In [None]:
# Write dds objects to files for gsea analysis.

# Pydeseq2 supports trend_coeffs/replaced as either np.array or pd.series, np.array required for 
# saving h5-formatted AnnData objects.
dds.uns['trend_coeffs'] = np.array(dds.uns['trend_coeffs'])
dds_gene.uns['trend_coeffs'] = np.array(dds_gene.uns['trend_coeffs'])

dds.varm['replaced'] = np.array(dds.varm['replaced'])
dds_gene.varm['replaced'] = np.array(dds_gene.varm['replaced'])

# DeseqDataSet doesn't have native support for writing h5, save as AnnData objects and restore from
# AnnData objects.
dds.write(DDS_TRANSCRIPT_FH)
dds_gene.write(DDS_GENE_FH)