In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
from IPython.display import display
import sys
import os
import logging

sys.path.insert(0,'../src')
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 100

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.__version__

'0.23.4'

TO RUN THE OTHER METHODS, SEE
- scripts/run_spatialde.py
- scripts/run_scgco.py
- scripts/launch_scgco.sh

## Load Information for GLISS

In [4]:
from io_utils import save_data_to_file, load_data_from_file
from main_methods import select_spatial_genes
from sim_utils import complex_eval_spatial_sim, read_trial_data
from general_utils import norm_mtx

sim_dir = '/Users/jjzhu/Google Drive/_GLISS/data/space_comp_sim/20191104'
save = True

# hyperparameters
alpha= 0.05
knn = 8
n_perm = 10000
cache = True

# file paths
sim_fn = os.path.join(sim_dir, 'sim_setup.csv')
sim_df = load_data_from_file(sim_fn, 'csv')
display(sim_df.head())

meth_dir = 'result_ours'
meth_dir = os.path.join(sim_dir, meth_dir)
os.makedirs(meth_dir, exist_ok=True)

eval_fn = 'eval_ours.csv'
eval_fn = os.path.join(sim_dir, eval_fn)
eval_df = pd.DataFrame()

### Run GLISS for each simulation

In [5]:
for i, entry in sim_df.iterrows():
    locs, data_norm = read_trial_data(sim_dir, entry)
    data_norm = norm_mtx(data_norm)
    assert locs.shape[0] == data_norm.shape[0], 'Mismatch samples'
    
    fn = os.path.join(meth_dir, 'result_{}.csv'.format(i))
    if cache and os.path.exists(fn):
        result = load_data_from_file(fn, 'csv')
        rej_idx = result.loc[result['reject']].index
    else:
        pvals, rej_idx = select_spatial_genes(locs, 
                                              data_norm.values, 
                                              knn=knn, 
                                              alpha=alpha, 
                                              n_perm=n_perm)
        reject = np.array([False] * len(pvals))
        reject[rej_idx] = True
        result = pd.DataFrame({'pvalue': pvals, 'reject': reject}, 
                               index=data_norm.columns)
        # save result to file
        if save:
            save_data_to_file(result, fn, 'csv')        
    new_entry = complex_eval_spatial_sim(entry, rej_idx)
    eval_df = eval_df.append(new_entry)
display(eval_df)
if save:
    save_data_to_file(eval_df, eval_fn, 'csv')

Unnamed: 0,seed,temp,n_per_reg,n_regs,locs_fn,expr_fn
0,0,linear,25,4,locs_2d.csv,sim_data/mtx_linear_0.csv
1,0,diaquad,25,4,locs_2d.csv,sim_data/mtx_diaquad_0.csv
2,0,radial,25,4,locs_2d.csv,sim_data/mtx_radial_0.csv
3,0,cosine,25,4,locs_2d.csv,sim_data/mtx_cosine_0.csv
4,1,linear,25,4,locs_2d.csv,sim_data/mtx_linear_1.csv


Unnamed: 0,FDP,Power,Power-Reg0,Power-Reg1,Power-Reg2,Power-Reg3,seed,temp
0,0.037500,0.77,1.0,1.0,1.0,0.08,0.0,linear
1,0.012346,0.80,1.0,1.0,1.0,0.20,0.0,diaquad
2,0.048780,0.78,1.0,1.0,1.0,0.12,0.0,radial
3,0.050633,0.75,1.0,1.0,1.0,0.00,0.0,cosine
4,0.012987,0.76,1.0,1.0,1.0,0.04,1.0,linear
5,0.058140,0.81,1.0,1.0,1.0,0.24,1.0,diaquad
6,0.012658,0.78,1.0,1.0,1.0,0.12,1.0,radial
7,0.037500,0.77,1.0,1.0,1.0,0.08,1.0,cosine
8,0.024096,0.81,1.0,1.0,1.0,0.24,2.0,linear
9,0.036585,0.79,1.0,1.0,1.0,0.16,2.0,diaquad


## Revaluate the rejections from all methods

In [9]:
def recompute_evaluation(method):
    alpha = 0.05
    meth_dir = 'result_{}'.format(method)
    eval_fn = 'eval_{}.csv'.format(method)

    meth_dir = os.path.join(sim_dir, meth_dir)
    eval_fn = os.path.join(sim_dir, eval_fn)
    eval_df = pd.DataFrame()

    for i, entry in sim_df.iterrows():
        fn = os.path.join(meth_dir, 'result_{}.csv'.format(i))
        result = load_data_from_file(fn, 'csv')
        if method == 'scgco':
            rej_idx = np.array(result.loc[result['fdr'] < alpha].index).astype(int)
        elif method == 'spatialde':
            rej_idx = np.array(result.loc[result['qval'] < alpha].g).astype(int)
        # evaluate for tracking
        new_entry = complex_eval_spatial_sim(entry, rej_idx)
        eval_df = eval_df.append(new_entry)
    save_data_to_file(eval_df, eval_fn, 'csv')
    display(eval_fn)
    display(eval_df.sample(10))

In [10]:
recompute_evaluation('spatialde')

'/share/PI/sabatti/feat_viz/space_comp_sim/20191104/eval_spatialde.csv'

Unnamed: 0,FDP,Power,Power-Reg0,Power-Reg1,Power-Reg2,Power-Reg3,seed,temp
29,0.0,0.75,1.0,1.0,1.0,0.0,7.0,diaquad
71,0.0,0.63,1.0,1.0,0.52,0.0,17.0,cosine
75,0.0,0.59,1.0,1.0,0.36,0.0,18.0,cosine
77,0.0,0.75,1.0,1.0,1.0,0.0,19.0,diaquad
64,0.0,0.75,1.0,1.0,1.0,0.0,16.0,linear
24,0.0,0.76,1.0,1.0,1.0,0.04,6.0,linear
67,0.0,0.63,1.0,1.0,0.52,0.0,16.0,cosine
1,0.0,0.77,1.0,1.0,1.0,0.08,0.0,diaquad
17,0.0,0.75,1.0,1.0,1.0,0.0,4.0,diaquad
42,0.0,0.75,1.0,1.0,1.0,0.0,10.0,radial


In [11]:
recompute_evaluation('scgco')

'/share/PI/sabatti/feat_viz/space_comp_sim/20191104/eval_scgco.csv'

Unnamed: 0,FDP,Power,Power-Reg0,Power-Reg1,Power-Reg2,Power-Reg3,seed,temp
27,0.0,0.47,1.0,0.88,0.0,0.0,6.0,cosine
32,0.0,0.6,1.0,1.0,0.4,0.0,8.0,linear
20,0.0,0.57,1.0,1.0,0.28,0.0,5.0,linear
79,0.0,0.48,1.0,0.92,0.0,0.0,19.0,cosine
18,0.0,0.61,1.0,1.0,0.44,0.0,4.0,radial
3,0.0,0.47,1.0,0.88,0.0,0.0,0.0,cosine
58,0.0,0.6,1.0,1.0,0.4,0.0,14.0,radial
62,0.0,0.57,1.0,1.0,0.28,0.0,15.0,radial
74,0.0,0.59,1.0,1.0,0.36,0.0,18.0,radial
34,0.0,0.61,1.0,1.0,0.44,0.0,8.0,radial
