# Run memento for inference comparison

Power analysis for DM, DV, and DC

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import scanpy as sc
import scipy as sp
import itertools
import numpy as np
import scipy.stats as stats
from scipy.integrate import dblquad
import seaborn as sns
from statsmodels.stats.multitest import fdrcorrection
import imp
pd.options.display.max_rows = 999
pd.set_option('display.max_colwidth', -1)
import pickle as pkl
import time
import string
from sklearn.datasets import make_spd_matrix


  pd.set_option('display.max_colwidth', -1)


In [2]:
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

import matplotlib.pylab as pylab
params = {'legend.fontsize': 'medium',
         'axes.labelsize': 'medium',
         'axes.titlesize':'medium',
         'figure.titlesize':'medium',
         'xtick.labelsize':'small',
         'ytick.labelsize':'small'}
pylab.rcParams.update(params)


In [3]:
import sys
sys.path.append('/home/ssm-user/Github/scrna-parameter-estimation/dist/memento-0.0.9-py3.8.egg')
import memento
import memento.simulate as simulate

In [4]:
data_path = '/data_volume/memento/simulation/inference/'

### Read the simulated datasets

In [5]:
dc_sim_adata = sc.read(data_path + 'dc.h5ad')

### Run memento for DE

In [6]:
de_sim_adata = sc.read(data_path + 'de.h5ad')
de_sim_adata.obs['q'] = 0.07

memento.setup_memento(de_sim_adata, q_column='q', filter_mean_thresh=0.07,trim_percent=1, shrinkage=0)
memento.create_groups(de_sim_adata, label_columns=['ct_real'])
memento.compute_1d_moments(de_sim_adata, filter_genes=True)

meta_df = memento.get_groups(de_sim_adata)
meta_df = pd.get_dummies(meta_df, prefix='', prefix_sep='', drop_first=False)

treatment = meta_df[['A']]
covariate = pd.DataFrame(np.ones((treatment.shape[0], 1)), columns=['intercept'])

memento.ht_1d_moments(
    de_sim_adata, 
    treatment=treatment,
    covariate=covariate,
    num_boot=5000, 
    verbose=1,
    num_cpus=13,
    resampling='bootstrap',
    approx=False)

  df_sub[k].cat.remove_unused_categories(inplace=True)
[Parallel(n_jobs=13)]: Using backend LokyBackend with 13 concurrent workers.
[Parallel(n_jobs=13)]: Done  24 tasks      | elapsed:    4.2s
[Parallel(n_jobs=13)]: Done 174 tasks      | elapsed:    7.8s
[Parallel(n_jobs=13)]: Done 424 tasks      | elapsed:   10.9s
[Parallel(n_jobs=13)]: Done 774 tasks      | elapsed:   15.3s
[Parallel(n_jobs=13)]: Done 1224 tasks      | elapsed:   21.4s
[Parallel(n_jobs=13)]: Done 1773 out of 1773 | elapsed:   28.9s finished


In [7]:
memento_de_result = memento.get_1d_ht_result(de_sim_adata)
memento_de_result['gene'] = memento_de_result['gene'].astype(int)

In [8]:
(memento_de_result.query('gene < 500').de_pval < 0.05).mean()

1.0

In [9]:
memento_de_result.to_csv(data_path + 'memento_de.csv', index=False)
de_sim_adata.write(data_path + 'de_filtered.h5ad')

... storing 'memento_group' as categorical


### Run memento for DV

In [10]:
dv_sim_adata = sc.read(data_path + 'dv.h5ad')
dv_sim_adata.obs['q'] = 0.07

memento.setup_memento(dv_sim_adata, q_column='q', filter_mean_thresh=0.07,trim_percent=1, shrinkage=0)
memento.create_groups(dv_sim_adata, label_columns=['ct_real'])
memento.compute_1d_moments(dv_sim_adata, filter_genes=True)

meta_df = memento.get_groups(dv_sim_adata)
meta_df = pd.get_dummies(meta_df, prefix='', prefix_sep='', drop_first=False)

treatment = meta_df[['A']]
covariate = pd.DataFrame(np.ones((treatment.shape[0], 1)), columns=['intercept'])

memento.ht_1d_moments(
    dv_sim_adata, 
    treatment=treatment,
    covariate=covariate,
    num_boot=5000, 
    verbose=1,
    num_cpus=13,
    resampling='bootstrap',
    approx=False)

  df_sub[k].cat.remove_unused_categories(inplace=True)
[Parallel(n_jobs=13)]: Using backend LokyBackend with 13 concurrent workers.
[Parallel(n_jobs=13)]: Done  24 tasks      | elapsed:    0.6s
[Parallel(n_jobs=13)]: Done 322 tasks      | elapsed:    4.2s
[Parallel(n_jobs=13)]: Done 822 tasks      | elapsed:    9.5s
[Parallel(n_jobs=13)]: Done 1522 tasks      | elapsed:   17.4s
[Parallel(n_jobs=13)]: Done 1768 out of 1768 | elapsed:   20.2s finished


In [11]:
memento_dv_result = memento.get_1d_ht_result(dv_sim_adata)
memento_dv_result['gene'] = memento_dv_result['gene'].astype(int)

In [12]:
(memento_dv_result.query('gene < 500').dv_pval < 0.05).mean()

0.776595744680851

In [13]:
memento_dv_result.to_csv(data_path + 'memento_dv.csv', index=False)
dv_sim_adata.write(data_path + 'dv_filtered.h5ad')

... storing 'memento_group' as categorical
