# 1D Variability hypothesis testing for HBEC IFN experiment

In [11]:
import scanpy as sc
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
from pybedtools import BedTool
import pickle as pkl
%matplotlib inline

In [12]:
import sys
sys.path.append('/home/ssm-user/Github/scrna-parameter-estimation/dist/memento-0.0.6-py3.8.egg')
sys.path.append('/home/ssm-user/Github/misc-seq/miscseq/')
import encode
import memento

In [13]:
data_path = '/data_volume/memento/hbec/'

### Read the processed RNA data

Focus on the club and bc/club cells and type I interferons for now.

Encode the timestamps to integers.

In [20]:
adata = sc.read(data_path + 'HBEC_type_I_filtered_counts_deep.h5ad')

In [21]:
converter = {'basal/club':'BC', 'basal':'B', 'ciliated':'C', 'goblet':'G', 'ionocyte/tuft':'IT', 'neuroendo':'N'}

In [22]:
adata.obs['ct'] = adata.obs['cell_type'].apply(lambda x: converter[x])

### Setup memento

In [23]:
def assign_q(batch):
    
    if batch == 0:
        return 0.387*0.25
    elif batch == 1:
        return 0.392*0.25
    elif batch == 2:
        return 0.436*0.25
    else:
        return 0.417*0.25

In [24]:
adata.obs['q'] = adata.obs['batch'].apply(assign_q)

In [26]:
memento.setup_memento(adata, q_column='q', trim_percent=0.05)

Version 0.0.6


### Run memento for each subset, comparing to control, separate cell types

In [16]:
cts = ['C', 'B', 'BC']
tps = ['3', '6', '9', '24', '48']

stims = ['alpha', 'beta', 'gamma', 'lambda']

In [17]:
import os
done_files = os.listdir(data_path + 'binary_test_latest/')

In [None]:
for ct in cts:
    for tp in tps:
        for stim in stims:
            
            fname = '{}_{}_{}.h5ad'.format('-'.join(ct), stim, tp)
            
            if fname in done_files:
                print('Skipping', fname)
                continue

            print('starting', ct, tp, stim)

            adata_stim = adata.copy()[
                adata.obs.ct.isin([ct]) & \
                adata.obs.stim.isin(['control', stim]) & \
                adata.obs.time.isin(['0',tp]), :].copy()
            time_converter={0:0, int(tp):1}
            adata_stim.obs['time_step'] = adata_stim.obs['time'].astype(int).apply(lambda x: time_converter[x])

            memento.create_groups(adata_stim, label_columns=['time_step', 'donor'])
            memento.compute_1d_moments(adata_stim, min_perc_group=.9)

            memento.ht_1d_moments(
                adata_stim, 
                formula_like='1 + time_step + donor',
                treatment_col='time_step', 
                num_boot=10000, 
                verbose=1,
                num_cpus=93,
                resampling='permutation',
                approx=True)

            adata_stim.write(data_path + 'binary_test_latest/{}_{}_{}.h5ad'.format(ct, stim, tp))

### Run memento for each subset, but stratify cell types

In [10]:
cts = ['C', 'B', 'BC']
tps = ['3', '6', '9', '24', '48']

stims = ['alpha', 'beta', 'gamma', 'lambda']

In [11]:
import os
done_files = os.listdir(data_path + 'binary_test_latest/')

In [None]:
for tp in tps:
    for stim in stims:

        fname = 'all_ct_{}_{}.h5ad'.format( stim, tp)

        if fname in done_files:
            print('Skipping', fname)
            continue

        print('starting', tp, stim)

        adata_stim = adata.copy()[
            adata.obs.ct.isin(cts) & \
            adata.obs.stim.isin(['control', stim]) & \
            adata.obs.time.isin(['0',tp]), :].copy()
        time_converter={0:0, int(tp):1}
        adata_stim.obs['time_step'] = adata_stim.obs['time'].astype(int).apply(lambda x: time_converter[x])

        memento.create_groups(adata_stim, label_columns=['time_step', 'donor', 'ct'])
        memento.compute_1d_moments(adata_stim, min_perc_group=.3)

        memento.ht_1d_moments(
            adata_stim, 
            formula_like='1 + time_step + donor + ct',
            treatment_col='time_step', 
            num_boot=10000, 
            verbose=1,
            num_cpus=93,
            resampling='permutation',
            approx=True)

        adata_stim.write(data_path + 'binary_test_latest/all_ct_{}_{}.h5ad'.format(stim, tp))

### Run memento going from 3hr to 48 hr, but stratify cell types

In [13]:
cts = ['C', 'B', 'BC']
tps = ['3', '6', '9', '24', '48']

stims = ['alpha', 'beta', 'gamma', 'lambda']

In [14]:
import os
done_files = os.listdir(data_path + 'binary_test_latest/')

In [None]:
for stim in stims:

    fname = 'all_ct_{}_3_vs_48.h5ad'.format(stim)

    if fname in done_files:
        print('Skipping', fname)
        continue

    print('starting', stim)

    adata_stim = adata.copy()[
        adata.obs.ct.isin(cts) & \
        adata.obs.stim.isin(['control', stim]) & \
        adata.obs.time.isin(['3','48']), :].copy()
    time_converter={3:0, int('48'):1}
    adata_stim.obs['time_step'] = adata_stim.obs['time'].astype(int).apply(lambda x: time_converter[x])

    memento.create_groups(adata_stim, label_columns=['time_step', 'donor', 'ct'])
    memento.compute_1d_moments(adata_stim, min_perc_group=.3)

    memento.ht_1d_moments(
        adata_stim, 
        formula_like='1 + time_step + donor + ct',
        treatment_col='time_step', 
        num_boot=10000, 
        verbose=1,
        num_cpus=93,
        resampling='permutation',
        approx=True)

    adata_stim.write(data_path + 'binary_test_latest/all_ct_{}_3_vs_48.h5ad'.format(stim))

### Run memento within stim for each cell type, using time as ordinal

In [27]:
cts = ['C', 'B', 'BC']
tps = ['3', '6', '9', '24', '48']

stims = ['alpha', 'beta', 'gamma', 'lambda']

In [28]:
import os
done_files = os.listdir(data_path + 'binary_test_latest/')

In [29]:
for ct in cts:
    for stim in stims:

        fname = '{}_{}_stim.h5ad'.format('-'.join(ct), stim)

        if fname in done_files:
            print('Skipping', fname)
            continue

        print('starting', ct, stim)

        adata_stim = adata.copy()[
            adata.obs.ct.isin([ct]) & \
            adata.obs.stim.isin([stim]), :].copy()
        time_converter={3:0, 6:1, 9:2, 24:3, 48:4}
        adata_stim.obs['time_step'] = adata_stim.obs['time'].astype(int).apply(lambda x: time_converter[x])

        memento.create_groups(adata_stim, label_columns=['time_step', 'donor'])
        memento.compute_1d_moments(adata_stim, min_perc_group=.9)

        memento.ht_1d_moments(
            adata_stim, 
            formula_like='1 + time_step + donor',
            treatment_col='time_step', 
            num_boot=10000, 
            verbose=1,
            num_cpus=93,
            resampling='permutation',
            approx=True)

        adata_stim.write(data_path + 'binary_test_latest/{}_{}_stim.h5ad'.format(ct, stim))

starting C alpha


  res = method(*args, **kwargs)
[Parallel(n_jobs=93)]: Using backend LokyBackend with 93 concurrent workers.
[Parallel(n_jobs=93)]: Done  14 tasks      | elapsed:    4.5s
[Parallel(n_jobs=93)]: Done 264 tasks      | elapsed:    8.7s
[Parallel(n_jobs=93)]: Done 614 tasks      | elapsed:   15.4s
[Parallel(n_jobs=93)]: Done 1064 tasks      | elapsed:   23.1s
[Parallel(n_jobs=93)]: Done 1614 tasks      | elapsed:   32.3s
[Parallel(n_jobs=93)]: Done 2264 tasks      | elapsed:   43.3s
[Parallel(n_jobs=93)]: Done 3014 tasks      | elapsed:   56.5s
[Parallel(n_jobs=93)]: Done 3864 tasks      | elapsed:  1.2min
[Parallel(n_jobs=93)]: Done 4814 tasks      | elapsed:  1.5min
[Parallel(n_jobs=93)]: Done 5864 tasks      | elapsed:  1.8min
[Parallel(n_jobs=93)]: Done 7014 tasks      | elapsed:  2.1min
[Parallel(n_jobs=93)]: Done 7771 out of 7771 | elapsed:  2.5min finished
... storing 'memento_group' as categorical


starting C beta


  res = method(*args, **kwargs)
[Parallel(n_jobs=93)]: Using backend LokyBackend with 93 concurrent workers.
[Parallel(n_jobs=93)]: Done  14 tasks      | elapsed:    0.8s
[Parallel(n_jobs=93)]: Done 264 tasks      | elapsed:    4.2s
[Parallel(n_jobs=93)]: Done 614 tasks      | elapsed:    9.8s
[Parallel(n_jobs=93)]: Done 1064 tasks      | elapsed:   16.4s
[Parallel(n_jobs=93)]: Done 1614 tasks      | elapsed:   24.0s
[Parallel(n_jobs=93)]: Done 2264 tasks      | elapsed:   33.5s
[Parallel(n_jobs=93)]: Done 3014 tasks      | elapsed:   44.4s
[Parallel(n_jobs=93)]: Done 3864 tasks      | elapsed:   57.0s
[Parallel(n_jobs=93)]: Done 4814 tasks      | elapsed:  1.2min
[Parallel(n_jobs=93)]: Done 5864 tasks      | elapsed:  1.5min
[Parallel(n_jobs=93)]: Done 7014 tasks      | elapsed:  1.7min
[Parallel(n_jobs=93)]: Done 8264 tasks      | elapsed:  2.0min
[Parallel(n_jobs=93)]: Done 8751 out of 8751 | elapsed:  2.3min finished
... storing 'memento_group' as categorical


starting C gamma


  res = method(*args, **kwargs)
[Parallel(n_jobs=93)]: Using backend LokyBackend with 93 concurrent workers.
[Parallel(n_jobs=93)]: Done  14 tasks      | elapsed:    1.0s
[Parallel(n_jobs=93)]: Done 264 tasks      | elapsed:    5.1s
[Parallel(n_jobs=93)]: Done 614 tasks      | elapsed:   11.9s
[Parallel(n_jobs=93)]: Done 1064 tasks      | elapsed:   19.8s
[Parallel(n_jobs=93)]: Done 1614 tasks      | elapsed:   29.1s
[Parallel(n_jobs=93)]: Done 2264 tasks      | elapsed:   40.5s
[Parallel(n_jobs=93)]: Done 3014 tasks      | elapsed:   54.1s
[Parallel(n_jobs=93)]: Done 3864 tasks      | elapsed:  1.2min
[Parallel(n_jobs=93)]: Done 4814 tasks      | elapsed:  1.4min
[Parallel(n_jobs=93)]: Done 5864 tasks      | elapsed:  1.8min
[Parallel(n_jobs=93)]: Done 7014 tasks      | elapsed:  2.1min
[Parallel(n_jobs=93)]: Done 8412 out of 8412 | elapsed:  2.7min finished
... storing 'memento_group' as categorical


starting C lambda


  res = method(*args, **kwargs)
[Parallel(n_jobs=93)]: Using backend LokyBackend with 93 concurrent workers.
[Parallel(n_jobs=93)]: Done  14 tasks      | elapsed:    0.9s
[Parallel(n_jobs=93)]: Done 264 tasks      | elapsed:    4.9s
[Parallel(n_jobs=93)]: Done 614 tasks      | elapsed:   11.1s
[Parallel(n_jobs=93)]: Done 1064 tasks      | elapsed:   18.6s
[Parallel(n_jobs=93)]: Done 1614 tasks      | elapsed:   27.3s
[Parallel(n_jobs=93)]: Done 2264 tasks      | elapsed:   38.3s
[Parallel(n_jobs=93)]: Done 3014 tasks      | elapsed:   50.2s
[Parallel(n_jobs=93)]: Done 3864 tasks      | elapsed:  1.1min
[Parallel(n_jobs=93)]: Done 4814 tasks      | elapsed:  1.3min
[Parallel(n_jobs=93)]: Done 5864 tasks      | elapsed:  1.6min
[Parallel(n_jobs=93)]: Done 7014 tasks      | elapsed:  2.0min
[Parallel(n_jobs=93)]: Done 8264 tasks      | elapsed:  2.3min
[Parallel(n_jobs=93)]: Done 8728 out of 8728 | elapsed:  2.6min finished
... storing 'memento_group' as categorical


starting B alpha


  res = method(*args, **kwargs)
[Parallel(n_jobs=93)]: Using backend LokyBackend with 93 concurrent workers.
[Parallel(n_jobs=93)]: Done  14 tasks      | elapsed:    0.7s
[Parallel(n_jobs=93)]: Done 264 tasks      | elapsed:    3.5s
[Parallel(n_jobs=93)]: Done 614 tasks      | elapsed:    7.8s
[Parallel(n_jobs=93)]: Done 1064 tasks      | elapsed:   12.9s
[Parallel(n_jobs=93)]: Done 1614 tasks      | elapsed:   19.1s
[Parallel(n_jobs=93)]: Done 2264 tasks      | elapsed:   26.6s
[Parallel(n_jobs=93)]: Done 3014 tasks      | elapsed:   35.2s
[Parallel(n_jobs=93)]: Done 3864 tasks      | elapsed:   45.4s
[Parallel(n_jobs=93)]: Done 4814 tasks      | elapsed:   56.7s
[Parallel(n_jobs=93)]: Done 5864 tasks      | elapsed:  1.2min
[Parallel(n_jobs=93)]: Done 6314 out of 6314 | elapsed:  1.3min finished
... storing 'memento_group' as categorical


starting B beta


  res = method(*args, **kwargs)
[Parallel(n_jobs=93)]: Using backend LokyBackend with 93 concurrent workers.
[Parallel(n_jobs=93)]: Done  14 tasks      | elapsed:    0.7s
[Parallel(n_jobs=93)]: Done 264 tasks      | elapsed:    4.0s
[Parallel(n_jobs=93)]: Done 614 tasks      | elapsed:    9.0s
[Parallel(n_jobs=93)]: Done 1064 tasks      | elapsed:   15.1s
[Parallel(n_jobs=93)]: Done 1614 tasks      | elapsed:   22.6s
[Parallel(n_jobs=93)]: Done 2264 tasks      | elapsed:   31.4s
[Parallel(n_jobs=93)]: Done 3014 tasks      | elapsed:   42.5s
[Parallel(n_jobs=93)]: Done 3864 tasks      | elapsed:   54.7s
[Parallel(n_jobs=93)]: Done 4313 out of 4313 | elapsed:  1.1min finished
... storing 'memento_group' as categorical


starting B gamma


  res = method(*args, **kwargs)
[Parallel(n_jobs=93)]: Using backend LokyBackend with 93 concurrent workers.
[Parallel(n_jobs=93)]: Done  14 tasks      | elapsed:    0.7s
[Parallel(n_jobs=93)]: Done 264 tasks      | elapsed:    3.6s
[Parallel(n_jobs=93)]: Done 614 tasks      | elapsed:    8.3s
[Parallel(n_jobs=93)]: Done 1064 tasks      | elapsed:   13.7s
[Parallel(n_jobs=93)]: Done 1614 tasks      | elapsed:   20.3s
[Parallel(n_jobs=93)]: Done 2264 tasks      | elapsed:   28.3s
[Parallel(n_jobs=93)]: Done 3014 tasks      | elapsed:   37.9s
[Parallel(n_jobs=93)]: Done 3864 tasks      | elapsed:   48.4s
[Parallel(n_jobs=93)]: Done 4814 tasks      | elapsed:  1.0min
[Parallel(n_jobs=93)]: Done 5864 tasks      | elapsed:  1.2min
[Parallel(n_jobs=93)]: Done 6953 out of 6953 | elapsed:  1.6min finished
... storing 'memento_group' as categorical


starting B lambda


  res = method(*args, **kwargs)
[Parallel(n_jobs=93)]: Using backend LokyBackend with 93 concurrent workers.
[Parallel(n_jobs=93)]: Done  14 tasks      | elapsed:    0.7s
[Parallel(n_jobs=93)]: Done 264 tasks      | elapsed:    3.4s
[Parallel(n_jobs=93)]: Done 614 tasks      | elapsed:    7.6s
[Parallel(n_jobs=93)]: Done 1064 tasks      | elapsed:   12.7s
[Parallel(n_jobs=93)]: Done 1614 tasks      | elapsed:   18.9s
[Parallel(n_jobs=93)]: Done 2264 tasks      | elapsed:   26.1s
[Parallel(n_jobs=93)]: Done 3014 tasks      | elapsed:   34.6s
[Parallel(n_jobs=93)]: Done 3864 tasks      | elapsed:   44.3s
[Parallel(n_jobs=93)]: Done 4814 tasks      | elapsed:   55.4s
[Parallel(n_jobs=93)]: Done 5864 tasks      | elapsed:  1.1min
[Parallel(n_jobs=93)]: Done 6416 out of 6416 | elapsed:  1.3min finished
... storing 'memento_group' as categorical


starting BC alpha


  res = method(*args, **kwargs)
[Parallel(n_jobs=93)]: Using backend LokyBackend with 93 concurrent workers.
[Parallel(n_jobs=93)]: Done  14 tasks      | elapsed:    0.7s
[Parallel(n_jobs=93)]: Done 264 tasks      | elapsed:    3.5s
[Parallel(n_jobs=93)]: Done 614 tasks      | elapsed:    8.0s
[Parallel(n_jobs=93)]: Done 1064 tasks      | elapsed:   13.5s
[Parallel(n_jobs=93)]: Done 1614 tasks      | elapsed:   20.0s
[Parallel(n_jobs=93)]: Done 2264 tasks      | elapsed:   27.9s
[Parallel(n_jobs=93)]: Done 3014 tasks      | elapsed:   37.3s
[Parallel(n_jobs=93)]: Done 3864 tasks      | elapsed:   47.6s
[Parallel(n_jobs=93)]: Done 4814 tasks      | elapsed:   59.8s
[Parallel(n_jobs=93)]: Done 5864 tasks      | elapsed:  1.2min
[Parallel(n_jobs=93)]: Done 7014 tasks      | elapsed:  1.5min
[Parallel(n_jobs=93)]: Done 7302 out of 7302 | elapsed:  1.6min finished
... storing 'memento_group' as categorical


starting BC beta


  res = method(*args, **kwargs)
[Parallel(n_jobs=93)]: Using backend LokyBackend with 93 concurrent workers.
[Parallel(n_jobs=93)]: Done  14 tasks      | elapsed:    0.8s
[Parallel(n_jobs=93)]: Done 264 tasks      | elapsed:    4.1s
[Parallel(n_jobs=93)]: Done 614 tasks      | elapsed:    9.5s
[Parallel(n_jobs=93)]: Done 1064 tasks      | elapsed:   16.1s
[Parallel(n_jobs=93)]: Done 1614 tasks      | elapsed:   23.7s
[Parallel(n_jobs=93)]: Done 2264 tasks      | elapsed:   33.2s
[Parallel(n_jobs=93)]: Done 3014 tasks      | elapsed:   44.4s
[Parallel(n_jobs=93)]: Done 3864 tasks      | elapsed:   56.6s
[Parallel(n_jobs=93)]: Done 4814 tasks      | elapsed:  1.2min
[Parallel(n_jobs=93)]: Done 5864 tasks      | elapsed:  1.4min
[Parallel(n_jobs=93)]: Done 7014 tasks      | elapsed:  1.7min
[Parallel(n_jobs=93)]: Done 7256 out of 7256 | elapsed:  1.9min finished
... storing 'memento_group' as categorical


starting BC gamma


  res = method(*args, **kwargs)
[Parallel(n_jobs=93)]: Using backend LokyBackend with 93 concurrent workers.
[Parallel(n_jobs=93)]: Done  14 tasks      | elapsed:    0.7s
[Parallel(n_jobs=93)]: Done 264 tasks      | elapsed:    3.6s
[Parallel(n_jobs=93)]: Done 614 tasks      | elapsed:    8.3s
[Parallel(n_jobs=93)]: Done 1064 tasks      | elapsed:   14.1s
[Parallel(n_jobs=93)]: Done 1614 tasks      | elapsed:   20.6s
[Parallel(n_jobs=93)]: Done 2264 tasks      | elapsed:   28.7s
[Parallel(n_jobs=93)]: Done 3014 tasks      | elapsed:   38.7s
[Parallel(n_jobs=93)]: Done 3864 tasks      | elapsed:   49.4s
[Parallel(n_jobs=93)]: Done 4814 tasks      | elapsed:  1.0min
[Parallel(n_jobs=93)]: Done 5864 tasks      | elapsed:  1.3min
[Parallel(n_jobs=93)]: Done 7014 tasks      | elapsed:  1.5min
[Parallel(n_jobs=93)]: Done 7430 out of 7430 | elapsed:  1.7min finished
... storing 'memento_group' as categorical


starting BC lambda


  res = method(*args, **kwargs)
[Parallel(n_jobs=93)]: Using backend LokyBackend with 93 concurrent workers.
[Parallel(n_jobs=93)]: Done  14 tasks      | elapsed:    0.7s
[Parallel(n_jobs=93)]: Done 264 tasks      | elapsed:    3.5s
[Parallel(n_jobs=93)]: Done 614 tasks      | elapsed:    8.0s
[Parallel(n_jobs=93)]: Done 1064 tasks      | elapsed:   13.5s
[Parallel(n_jobs=93)]: Done 1614 tasks      | elapsed:   20.0s
[Parallel(n_jobs=93)]: Done 2264 tasks      | elapsed:   27.9s
[Parallel(n_jobs=93)]: Done 3014 tasks      | elapsed:   37.6s
[Parallel(n_jobs=93)]: Done 3864 tasks      | elapsed:   47.9s
[Parallel(n_jobs=93)]: Done 4814 tasks      | elapsed:  1.0min
[Parallel(n_jobs=93)]: Done 5864 tasks      | elapsed:  1.2min
[Parallel(n_jobs=93)]: Done 7014 tasks      | elapsed:  1.5min
[Parallel(n_jobs=93)]: Done 7545 out of 7545 | elapsed:  1.7min finished
... storing 'memento_group' as categorical
