# 1D Variability analysis for HBEC IFN experiment

In [1]:
import scanpy as sc
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
from pybedtools import BedTool
import pickle as pkl
%matplotlib inline


Bad key "text.kerning_factor" on line 4 in
/data/home/anaconda3/envs/single_cell/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.1.3/matplotlibrc.template
or from the matplotlib source distribution


In [2]:
import sys
sys.path.append('/data/home/Github/scrna-parameter-estimation/dist/memento-0.0.4-py3.7.egg')
sys.path.append('/data/home/Github/misc-seq/miscseq/')
import encode
import memento

In [3]:
data_path = '/data_volume/ifn_hbec/'

### Read the processed RNA data

Focus on the club and bc/club cells and type I interferons for now.

Encode the timestamps to integers.

In [4]:
adata = sc.read(data_path + 'HBEC_type_I_filtered_counts_deep.h5ad')

In [5]:
adata = adata[:, ~adata.var.index.str.startswith('MT-')].copy()
adata.obs['cell_type'] = adata.obs['cell_type'].apply(lambda x: x if x != 'basal/club' else 'bc')
adata.obs['cell_type'] = adata.obs['cell_type'].apply(lambda x: x if x != 'ionocyte/tuft' else 'ion-tuft')

In [6]:
adata.shape

(69958, 36588)

### Setup memento

In [7]:
def assign_q(batch):
    
    if batch == 0:
        return 0.387*0.25
    elif batch == 1:
        return 0.392*0.25
    elif batch == 2:
        return 0.436*0.25
    else:
        return 0.417*0.25

In [8]:
adata.obs['q'] = adata.obs['batch'].apply(assign_q)

In [9]:
memento.setup_memento(adata, q_column='q')

In [10]:
adata.obs.head(3)

Unnamed: 0,NUM.SNPS,BEST.GUESS,DROPLET.TYPE,batch,HTO_classification,condition,donor,stim,time,n_genes,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,leiden,cell_type,q,memento_size_factor
AAACCCAAGGAAGTAG-1,1700,donor0,SNG,0,hash-10,d2513_lambda_9,d2513,lambda,9,4297,4297,12191.0,1394.0,11.434665,3,ciliated,0.10425,0.955289
AAACCCAAGGGACTGT-1,839,donor0,SNG,0,hash-3,d2513_alpha_24,d2513,alpha,24,2171,2171,7270.0,386.0,5.309491,5,bc,0.10425,0.824407
AAACCCACAATCTGCA-1,1130,donor1,SNG,0,hash-1,d2614_alpha_3,d2614,alpha,3,3016,3016,9544.0,516.0,5.406538,0,basal,0.10425,0.988939


### Run memento for each subset, comparing to control

In [11]:
cts = [['ciliated'], ['bc'], ['basal']]
tps = ['3', '6', '9', '24', '48']
stims = ['alpha', 'beta', 'gamma', 'lambda']

In [12]:
import os
done_files = os.listdir('/data_volume/ifn_hbec/binary_test_deep/')

In [None]:
for ct in cts:
    for tp in tps:
        for stim in stims:
            
            fname = '{}_{}_{}.h5ad'.format('-'.join(ct), stim, tp)
            
            if fname in done_files:
                print('Skipping', fname)
                continue
            
            try:
            
                print('starting', ct, tp, stim)

                adata_stim = adata.copy()[
                    adata.obs.cell_type.isin(ct) & \
                    adata.obs.stim.isin(['control', stim]) & \
                    adata.obs.time.isin(['0',tp]), :].copy()
                time_converter={0:0, int(tp):1}
                adata_stim.obs['time_step'] = adata_stim.obs['time'].astype(int).apply(lambda x: time_converter[x])

                memento.create_groups(adata_stim, label_columns=['time_step', 'donor'])
                memento.compute_1d_moments(adata_stim, min_perc_group=.9)

                memento.ht_1d_moments(
                    adata_stim, 
                    formula_like='1 + time_step',
                    cov_column='time_step', 
                    num_boot=10000, 
                    verbose=1,
                    num_cpus=14)

                del adata_stim.uns['memento']['mv_regressor']

                adata_stim.write('/data_volume/ifn_hbec/binary_test_deep/{}_{}_{}.h5ad'.format(
                    '-'.join(ct), stim, tp))
            except:
                print('failed')
                continue