# Single cell methods for cell type comparison in lupus data

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import functools
import numpy as np
import scanpy as sc
import scipy.stats as stats
from statsmodels.stats.multitest import fdrcorrection
from patsy import dmatrix, dmatrices 
import statsmodels.api as sm

import sys
sys.path.append('/home/ssm-user/Github/scrna-parameter-estimation/dist/memento-0.0.9-py3.8.egg')
import memento
import memento.simulate as simulate

data_path = '/data_volume/memento/method_comparison/lupus/'

columns = ['logFC', 'PValue', 'FDR']

### Read the data 

In [5]:
adata = sc.read(data_path + 'T4_vs_cM.single_cell.10000.0.h5ad')

# inds = adata.obs.ind.drop_duplicates().tolist()
# cts = adata.obs.cg_cov.drop_duplicates().tolist()

# ### Run the t-test

# def safe_fdr(x):
#     fdr = np.ones(x.shape[0])
#     _, fdr[np.isfinite(x)] = fdrcorrection(x[np.isfinite(x)])
#     return fdr

# ttest_adata = adata.copy()
# sc.pp.normalize_total(ttest_adata)
# sc.pp.log1p(ttest_adata)

# data1 = ttest_adata[ttest_adata.obs['cg_cov'] =='T4'].X.todense()
# data2 = ttest_adata[ttest_adata.obs['cg_cov'] =='cM'].X.todense()

# statistic, pvalue = stats.ttest_ind(data1, data2, axis=0)

# logfc = data1.mean(axis=0) - data2.mean(axis=0)

# ttest_result = pd.DataFrame(
#     zip(logfc.A1, pvalue, safe_fdr(pvalue)), 
#     index=ttest_adata.var.index,
#     columns=columns)
# ttest_result.to_csv(data_path + 'T4_vs_cM.sc.ttest.csv')

Only considering the two last: ['.0', '.h5ad'].
Only considering the two last: ['.0', '.h5ad'].


### Current top implementation

In [7]:
glm_adata = adata.copy()
dispersions = pd.read_csv(data_path + 'T4_vs_cM.dispersions.csv', index_col=0)
gene_list = dispersions['gene'].tolist()
dispersions = dispersions['dispersion'].tolist()

def sample_sum(data):
    
    s = data.sum(axis=0)
    return s

def scaled_mean_se2(data):
    
    augmented_data = np.append(data, np.ones((1,data.shape[1])), axis=0)

    q=0.07
    sf = augmented_data.sum(axis=1)
    X = augmented_data/sf.reshape(-1,1)
    
    naive_v = X.var(axis=0)
    naive_m = X.mean(axis=0)
    v = naive_v-(1-q)*(X/(sf**2-sf*(1-q)).reshape(-1,1)).mean(axis=0)
    variance_contributions = ((1-q)/sf).reshape(-1,1)*naive_m.reshape(1,-1) + v.reshape(1,-1)
    m = np.average( X, weights=1/variance_contributions, axis=0)
    m[~np.isfinite(m)] = naive_m[~np.isfinite(m)]
    
    # m = (augmented_data/sf.reshape(-1,1)).mean(axis=0)
    # v = (augmented_data/sf.reshape(-1,1)).var(axis=0)
    # v = v-(1-q)*(X/(sf**2-sf*(1-q)).reshape(-1,1)).mean(axis=0)
    
    return m, (v/data.shape[0])

scaled_means = []
weights = []
meta = []
totals = []
for ind in inds:
    for ct in ['cM', 'T4']:
        
        data = glm_adata[(glm_adata.obs['ind']==ind) & (glm_adata.obs['cg_cov']==ct)].X.toarray()
        totals.append(data.sum())
        s, se2 = scaled_mean_se2(data)
        scaled_means.append(s)
        w = np.ones(s.shape[0])
        w[se2>0] = 1/se2[se2>0]
        weights.append(np.sqrt(1/se2))
        meta.append((ind, int(ct=='T4')))
scaled_means = pd.DataFrame(np.vstack(scaled_means), columns=glm_adata.var.index)
weights = pd.DataFrame(np.vstack(weights), columns=glm_adata.var.index)
totals = np.array(totals)
meta = pd.DataFrame(meta, columns=['ind', 'ct'])

# Filter and re-order by gene_list
scaled_means = scaled_means[gene_list]
weights = weights[gene_list]

# weights = weights / weights.mean(axis=0)

design = dmatrix('ct+ind', meta)
totals = scaled_means.sum(axis=1).values

weighted_mean_glm_results = []
for idx in range(len(gene_list)):
    model = sm.WLS(
        np.log(scaled_means.iloc[:, [idx]]), 
        design , 
        weights=weights.iloc[:, idx])
    res_model = sm.WLS(
        np.log(scaled_means.iloc[:, [idx]]), 
        design[:, :-1] , 
        weights=weights.iloc[:, idx])
    fit = model.fit()
    res_fit = res_model.fit()
    pv = stats.chi2.sf(-2*(res_fit.llf - fit.llf), df=res_fit.df_resid-fit.df_resid)
    weighted_mean_glm_results.append((fit.params[-1], pv))
weighted_mean_glm_results = pd.DataFrame(weighted_mean_glm_results, columns=['logFC', 'PValue'], index=gene_list)
_, weighted_mean_glm_results['FDR'] = fdrcorrection(weighted_mean_glm_results['PValue'])

weighted_mean_glm_results.to_csv(data_path + 'T4_vs_cM.sc.weighted_mean_wls.csv')

In [None]:
mwu_stat, mwu_pval = stats.mannwhitneyu(data1, data2, axis=0)
mwu_result = pd.DataFrame(
    zip(logfc.A1, mwu_pval, safe_fdr(mwu_pval)), 
    index=ttest_adata.var.index,
    columns=columns)
mwu_result.to_csv(data_path + 'T4_vs_cM.sc.mwu.csv')

### Run weighted regression

In [None]:
reg_adata = adata.copy()

In [None]:
def sample_mean_estimator(data):
    augmented_data = np.append(data, np.ones((1,data.shape[1])), axis=0)
    sf = augmented_data.sum(axis=1)
    m = (augmented_data/sf.reshape(-1,1)).mean(axis=0)
    v = (augmented_data/sf.reshape(-1,1)).var(axis=0)
    se = np.sqrt(v/(augmented_data.shape[0]-1))
    
    return m, se

In [None]:
def fe_se(cov):
    """Get fixed-effect standard error given the precision matrix."""
    if cov.ndim == 2:
        cov = cov[None, :, :]

    return np.sqrt(np.diagonal(cov)).T

def wls(y, v, X, tau2=0):
    
    w = 1.0 / (v + tau2)

    # Einsum indices: k = studies, p = predictors, i = parallel iterates
    wX = np.einsum("kp,ki->ipk", X, w)
    cov = wX.dot(X)

    # numpy >= 1.8 inverts stacked matrices along the first N - 2 dims, so we
    # can vectorize computation along the second dimension (parallel datasets)
    precision = np.linalg.pinv(cov).T

    pwX = np.einsum("ipk,qpi->iqk", wX, precision)
    beta = np.einsum("ipk,ik->ip", pwX, y.T).T
    
    se = fe_se(precision)
    z = beta / se
    p = 1 - np.abs(0.5 - stats.norm.cdf(z)) * 2
    
    return beta, se, p

In [None]:
means = []
variances = []
meta = []
for ind in inds:
    for ct in ['cM', 'T4']:
        
        data = reg_adata[(reg_adata.obs['ind']==ind) & (reg_adata.obs['cg_cov']==ct)].X.toarray()
        m, se = sample_mean_estimator(data)
        
        means.append(m)
        variances.append(np.power(se,2))
        meta.append((ind, int(ct=='T4')))
means = np.vstack(means)
variances = np.vstack(variances)
meta = pd.DataFrame(meta, columns=['ind', 'ct'])

In [None]:
design = dmatrix('ct*ind', meta)
ct_idx = 3

In [None]:
beta_list = []
pv_list = []
for i in range(means.shape[1]):
    
    b, se, p = wls(means[:,[i]], variances[:,[i]], design)
    beta_list.append(b[ct_idx][0])
    pv_list.append(p[ct_idx][0])

wls_result = pd.DataFrame(zip(beta_list, pv_list,  fdrcorrection(pv_list)[1]  ), columns=columns, index=reg_adata.var.index)
wls_result.to_csv(data_path + 'T4_vs_cM.sc.wls.csv')

### sum GLM approach with borrowed dispersion parameters - no weights 

In [None]:
glm_adata = adata.copy()
dispersions = pd.read_csv(data_path + 'T4_vs_cM.dispersions.csv', index_col=0)
gene_list = dispersions['gene'].tolist()
dispersions = dispersions['dispersion'].tolist()

In [None]:
def sample_sum(data):
    
    s = data.sum(axis=0)
    return s

In [None]:
sums = []
meta = []
for ind in inds:
    for ct in ['cM', 'T4']:
        
        data = glm_adata[(glm_adata.obs['ind']==ind) & (glm_adata.obs['cg_cov']==ct)].X.toarray()
        s = sample_sum(data)
        sums.append(s)
        meta.append((ind, int(ct=='T4')))
sums = pd.DataFrame(np.vstack(sums), columns=glm_adata.var.index)
meta = pd.DataFrame(meta, columns=['ind', 'ct'])

# Filter and re-order by gene_list
sums = sums[gene_list]

In [None]:
design = dmatrix('ct+ind', meta)

In [None]:
exposure = sums.sum(axis=1).values

In [None]:
%%time
sum_glm_results = []
for idx in range(len(gene_list)):
    model = sm.GLM(sums.iloc[:, [idx]], design , exposure=exposure,family=sm.families.NegativeBinomial(alpha=dispersions[idx]))
    res_model = sm.GLM(sums.iloc[:, [idx]], design[:, :-1] , exposure=exposure,family=sm.families.NegativeBinomial(alpha=dispersions[idx]))
    fit = model.fit()
    res_fit = res_model.fit()
    pv = stats.chi2.sf(-2*(res_fit.llf - fit.llf), df=res_fit.df_resid-fit.df_resid)
    sum_glm_results.append((fit.params[-1], pv))
sum_glm_results = pd.DataFrame(sum_glm_results, columns=['logFC', 'PValue'], index=gene_list)
_, sum_glm_results['FDR'] = fdrcorrection(sum_glm_results['PValue'])

In [None]:
sum_glm_results.to_csv(data_path + 'T4_vs_cM.sc.sum_glm.csv')

### scaled mean GLM approach with borrowed dispersion parameters - no weights 

In [None]:
glm_adata = adata.copy()
dispersions = pd.read_csv(data_path + 'T4_vs_cM.dispersions.csv', index_col=0)
gene_list = dispersions['gene'].tolist()
dispersions = dispersions['dispersion'].tolist()

In [None]:
def sample_sum(data):
    
    s = data.sum(axis=0)
    return s

def scaled_mean(data):

    sf = data.sum(axis=1)
    m = (data/sf.reshape(-1,1)).mean(axis=0)
    
    return m*data.sum()

In [None]:
scaled_means = []
meta = []
totals = []
for ind in inds:
    for ct in ['cM', 'T4']:
        
        data = glm_adata[(glm_adata.obs['ind']==ind) & (glm_adata.obs['cg_cov']==ct)].X.toarray()
        totals.append(data.sum())
        s = scaled_mean(data)
        scaled_means.append(s)
        meta.append((ind, int(ct=='T4')))
scaled_means = pd.DataFrame(np.vstack(scaled_means), columns=glm_adata.var.index)
totals = np.array(totals)
meta = pd.DataFrame(meta, columns=['ind', 'ct'])

# Filter and re-order by gene_list
scaled_means = scaled_means[gene_list]

In [None]:
design = dmatrix('ct+ind', meta)

In [None]:
totals = scaled_means.sum(axis=1).values

In [None]:
%%time
scaled_mean_glm_results = []
for idx in range(len(gene_list)):
    model = sm.GLM(scaled_means.iloc[:, [idx]], design , exposure=totals,family=sm.families.NegativeBinomial(alpha=dispersions[idx]))
    res_model = sm.GLM(scaled_means.iloc[:, [idx]], design[:, :-1] , exposure=totals,family=sm.families.NegativeBinomial(alpha=dispersions[idx]))
    fit = model.fit()
    res_fit = res_model.fit()
    pv = stats.chi2.sf(-2*(res_fit.llf - fit.llf), df=res_fit.df_resid-fit.df_resid)
    scaled_mean_glm_results.append((fit.params[-1], pv))
scaled_mean_glm_results = pd.DataFrame(scaled_mean_glm_results, columns=['logFC', 'PValue'], index=gene_list)
_, scaled_mean_glm_results['FDR'] = fdrcorrection(scaled_mean_glm_results['PValue'])

In [None]:
scaled_mean_glm_results.to_csv(data_path + 'T4_vs_cM.sc.scaled_mean_glm.csv')

### scaled iv mean GLM approach with borrowed dispersion parameters

In [None]:
glm_adata = adata.copy()
dispersions = pd.read_csv(data_path + 'T4_vs_cM.dispersions.csv', index_col=0)
gene_list = dispersions['gene'].tolist()
dispersions = dispersions['dispersion'].tolist()

In [None]:
def scaled_iv_mean(data):
    q=0.07
    augmented_data = data #np.append(data, np.ones((1,data.shape[1])), axis=0)
    sf = augmented_data.sum(axis=1)
    X = augmented_data/sf.reshape(-1,1)
    naive_v = X.var(axis=0)
    naive_m = X.mean(axis=0)
    v = naive_v-(1-q)*(X/(sf**2-sf*(1-q)).reshape(-1,1)).mean(axis=0)
    variance_contributions = ((1-q)/sf).reshape(-1,1)*naive_m.reshape(1,-1) + v.reshape(1,-1)
    m = np.average( X, weights=1/variance_contributions, axis=0)
    m[~np.isfinite(m)] = naive_m[~np.isfinite(m)]
    
    return m*augmented_data.sum()

In [None]:
scaled_iv_means = []
meta = []
totals = []
for ind in inds:
    for ct in ['cM', 'T4']:
        
        data = glm_adata[(glm_adata.obs['ind']==ind) & (glm_adata.obs['cg_cov']==ct)].X.toarray()
        totals.append(data.sum())
        s = scaled_iv_mean(data)
        scaled_iv_means.append(s)
        meta.append((ind, int(ct=='T4')))
scaled_iv_means = pd.DataFrame(np.vstack(scaled_iv_means), columns=glm_adata.var.index)
totals = np.array(totals)
meta = pd.DataFrame(meta, columns=['ind', 'ct'])

# Filter and re-order by gene_list
scaled_iv_means = scaled_iv_means[gene_list]

In [None]:
design = dmatrix('ct+ind', meta)

In [None]:
totals = scaled_iv_means.sum(axis=1).values

In [None]:
%%time
scaled_iv_mean_glm_results = []
for idx in range(len(gene_list)):
    model = sm.GLM(
        scaled_iv_means.iloc[:, [idx]].values,
        design, 
        exposure=totals,
        family=sm.families.NegativeBinomial(alpha=dispersions[idx]))
    res_model = sm.GLM(
        scaled_iv_means.iloc[:, [idx]].values,
        design[:, :-1] , 
        exposure=totals,
        family=sm.families.NegativeBinomial(alpha=dispersions[idx]))
    fit = model.fit()
    res_fit = res_model.fit()
    pv = stats.chi2.sf(-2*(res_fit.llf - fit.llf), df=res_fit.df_resid-fit.df_resid)
    scaled_iv_mean_glm_results.append((fit.params[-1], fit.pvalues[-1]))
    # break
scaled_iv_mean_glm_results = pd.DataFrame(scaled_iv_mean_glm_results, columns=['logFC', 'PValue'], index=gene_list)
_, scaled_iv_mean_glm_results['FDR'] = fdrcorrection(scaled_iv_mean_glm_results['PValue'])

In [None]:
scaled_iv_mean_glm_results.to_csv(data_path + 'T4_vs_cM.sc.scaled_iv_mean_glm.csv')

### scaled mean GLM approach with borrowed dispersion parameters - weights 

In [None]:
glm_adata = adata.copy()
dispersions = pd.read_csv(data_path + 'T4_vs_cM.dispersions.csv', index_col=0)
gene_list = dispersions['gene'].tolist()
dispersions = dispersions['dispersion'].tolist()

def sample_sum(data):
    
    s = data.sum(axis=0)
    return s

def scaled_mean_se2(data):
    
    augmented_data = np.append(data, np.ones((1,data.shape[1])), axis=0)

    q=0.07
    sf = augmented_data.sum(axis=1)
    X = augmented_data/sf.reshape(-1,1)
    
    naive_v = X.var(axis=0)
    naive_m = X.mean(axis=0)
    v = naive_v-(1-q)*(X/(sf**2-sf*(1-q)).reshape(-1,1)).mean(axis=0)
    variance_contributions = ((1-q)/sf).reshape(-1,1)*naive_m.reshape(1,-1) + v.reshape(1,-1)
    m = np.average( X, weights=1/variance_contributions, axis=0)
    m[~np.isfinite(m)] = naive_m[~np.isfinite(m)]
    
    # m = (augmented_data/sf.reshape(-1,1)).mean(axis=0)
    # v = (augmented_data/sf.reshape(-1,1)).var(axis=0)
    # v = v-(1-q)*(X/(sf**2-sf*(1-q)).reshape(-1,1)).mean(axis=0)
    
    return m*data.sum(), (v)*(data.sum()**2),(v/data.shape[0])*(data.sum()**2)

scaled_means = []
weights = []
meta = []
totals = []
for ind in inds:
    for ct in ['cM', 'T4']:
        
        data = glm_adata[(glm_adata.obs['ind']==ind) & (glm_adata.obs['cg_cov']==ct)].X.toarray()
        totals.append(data.sum())
        s, v, se2 = scaled_mean_se2(data)
        scaled_means.append(s)
        w = np.ones(s.shape[0])
        w[se2>0] = 1/se2[se2>0]
        weights.append(np.sqrt(1/se2))
        # weights.append(1/se2)

        meta.append((ind, int(ct=='T4')))
scaled_means = pd.DataFrame(np.vstack(scaled_means), columns=glm_adata.var.index)
weights = pd.DataFrame(np.vstack(weights), columns=glm_adata.var.index)
# totals = np.array(totals)
totals = (scaled_means).sum(axis=1).values
meta = pd.DataFrame(meta, columns=['ind', 'ct'])

# Filter and re-order by gene_list
scaled_means = scaled_means[gene_list]
weights = weights[gene_list]

# weights = weights*10
# weights = weights / weights.sum(axis=0) * weights.shape[0]
# weights[weights.columns] = np.ones(weights.shape)*10
# weights = weights /  weights.mean(axis=0)
weights = weights / weights.values.mean()
design = dmatrix('ct+ind', meta)


weighted_mean_glm_results = []
for idx in range(len(gene_list)):
    model = sm.GLM(
        scaled_means.iloc[:, [idx]], 
        design , 
        exposure=totals,
        var_weights=weights.iloc[:, idx],
        family=sm.families.NegativeBinomial(alpha=dispersions[idx]))
    res_model = sm.GLM(
        scaled_means.iloc[:, [idx]], design[:, :-1] , 
        exposure=totals,
        var_weights=weights.iloc[:, idx],
        family=sm.families.NegativeBinomial(alpha=dispersions[idx]))
    fit = model.fit()
    res_fit = res_model.fit()
    pv = stats.chi2.sf(-2*(res_fit.llf - fit.llf), df=res_fit.df_resid-fit.df_resid)
    weighted_mean_glm_results.append((fit.params[-1], pv))
weighted_mean_glm_results = pd.DataFrame(weighted_mean_glm_results, columns=['logFC', 'PValue'], index=gene_list)
_, weighted_mean_glm_results['FDR'] = fdrcorrection(weighted_mean_glm_results['PValue'])

weighted_mean_glm_results.to_csv(data_path + 'T4_vs_cM.sc.weighted_mean_glm.csv')

### scaled mean WLS approach with borrowed dispersion parameters - weights 

In [493]:
glm_adata = adata.copy()
dispersions = pd.read_csv(data_path + 'T4_vs_cM.dispersions.csv', index_col=0)
gene_list = dispersions['gene'].tolist()
dispersions = dispersions['dispersion'].tolist()

def sample_sum(data):
    
    s = data.sum(axis=0)
    return s

def scaled_mean_se2(data):
    
    augmented_data = np.append(data, np.ones((1,data.shape[1])), axis=0)

    q=0.07
    sf = augmented_data.sum(axis=1)
    X = augmented_data/sf.reshape(-1,1)
    
    naive_v = X.var(axis=0)
    naive_m = X.mean(axis=0)
    v = naive_v-(1-q)*(X/(sf**2-sf*(1-q)).reshape(-1,1)).mean(axis=0)
    variance_contributions = ((1-q)/sf).reshape(-1,1)*naive_m.reshape(1,-1) + v.reshape(1,-1)
    m = np.average( X, weights=1/variance_contributions, axis=0)
    m[~np.isfinite(m)] = naive_m[~np.isfinite(m)]
    
    # m = (augmented_data/sf.reshape(-1,1)).mean(axis=0)
    # v = (augmented_data/sf.reshape(-1,1)).var(axis=0)
    # v = v-(1-q)*(X/(sf**2-sf*(1-q)).reshape(-1,1)).mean(axis=0)
    
    return m, (v/data.shape[0])

scaled_means = []
weights = []
meta = []
totals = []
for ind in inds:
    for ct in ['cM', 'T4']:
        
        data = glm_adata[(glm_adata.obs['ind']==ind) & (glm_adata.obs['cg_cov']==ct)].X.toarray()
        totals.append(data.sum())
        s, se2 = scaled_mean_se2(data)
        scaled_means.append(s)
        w = np.ones(s.shape[0])
        w[se2>0] = 1/se2[se2>0]
        weights.append(np.sqrt(1/se2))
        meta.append((ind, int(ct=='T4')))
scaled_means = pd.DataFrame(np.vstack(scaled_means), columns=glm_adata.var.index)
weights = pd.DataFrame(np.vstack(weights), columns=glm_adata.var.index)
totals = np.array(totals)
meta = pd.DataFrame(meta, columns=['ind', 'ct'])

# Filter and re-order by gene_list
scaled_means = scaled_means[gene_list]
weights = weights[gene_list]

# weights = weights / weights.mean(axis=0)

design = dmatrix('ct+ind', meta)
totals = scaled_means.sum(axis=1).values

weighted_mean_glm_results = []
for idx in range(len(gene_list)):
    model = sm.WLS(
        np.log(scaled_means.iloc[:, [idx]]), 
        design , 
        weights=weights.iloc[:, idx])
    res_model = sm.WLS(
        np.log(scaled_means.iloc[:, [idx]]), 
        design[:, :-1] , 
        weights=weights.iloc[:, idx])
    fit = model.fit()
    res_fit = res_model.fit()
    pv = stats.chi2.sf(-2*(res_fit.llf - fit.llf), df=res_fit.df_resid-fit.df_resid)
    weighted_mean_glm_results.append((fit.params[-1], pv))
weighted_mean_glm_results = pd.DataFrame(weighted_mean_glm_results, columns=['logFC', 'PValue'], index=gene_list)
_, weighted_mean_glm_results['FDR'] = fdrcorrection(weighted_mean_glm_results['PValue'])

weighted_mean_glm_results.to_csv(data_path + 'T4_vs_cM.sc.weighted_mean_wls.csv')

### Current implementation of memento

In [313]:
glm_adata = adata.copy()
dispersions = pd.read_csv(data_path + 'T4_vs_cM.dispersions.csv', index_col=0)
gene_list = dispersions['gene'].tolist()
dispersions = dispersions['dispersion'].tolist()

In [349]:
glm_adata.obs['q'] = 0.07
memento.setup_memento(glm_adata, q_column='q', filter_mean_thresh=0.001,trim_percent=0.05, shrinkage=0)
# de_sim_adata.obs['memento_size_factor'] = de_sim_adata.X.sum(axis=1).A1
memento.create_groups(glm_adata, label_columns=['ind', 'cg_cov'])
memento.compute_1d_moments(glm_adata, filter_genes=True)

meta_df = memento.get_groups(glm_adata)
meta_df['ind'] = meta_df['ind'].astype(str)
meta_df = pd.get_dummies(meta_df, prefix='', prefix_sep='', drop_first=True)

treatment = 1-meta_df[['cM']]
covariate = pd.concat([meta_df.iloc[:, :3], meta_df.iloc[:, :3]*treatment.values], axis=1)

memento.ht_1d_moments(
    glm_adata, 
    treatment=treatment,
    covariate=covariate,
    num_boot=5000, 
    verbose=1,
    num_cpus=14,
    approx=True)

[Parallel(n_jobs=14)]: Using backend LokyBackend with 14 concurrent workers.
[Parallel(n_jobs=14)]: Done  22 tasks      | elapsed:    2.6s
[Parallel(n_jobs=14)]: Done 172 tasks      | elapsed:    3.9s
[Parallel(n_jobs=14)]: Done 422 tasks      | elapsed:    6.2s
[Parallel(n_jobs=14)]: Done 772 tasks      | elapsed:    9.1s
[Parallel(n_jobs=14)]: Done 1222 tasks      | elapsed:   12.8s
[Parallel(n_jobs=14)]: Done 1772 tasks      | elapsed:   17.5s
[Parallel(n_jobs=14)]: Done 2422 tasks      | elapsed:   23.2s
[Parallel(n_jobs=14)]: Done 3172 tasks      | elapsed:   29.6s
[Parallel(n_jobs=14)]: Done 4022 tasks      | elapsed:   37.0s
[Parallel(n_jobs=14)]: Done 4972 tasks      | elapsed:   45.0s
[Parallel(n_jobs=14)]: Done 5603 out of 5603 | elapsed:   50.4s finished


In [350]:
memento_result = memento.get_1d_ht_result(glm_adata)

In [351]:
memento_result.index = memento_result['gene']

In [352]:
memento_result['de_fdr'] = memento.util._fdrcorrect(memento_result['de_pval'])

In [353]:
memento_result[['de_coef', 'de_pval', 'de_fdr']].to_csv(data_path + 'T4_vs_cM.sc.memento.csv')