In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
from IPython.display import display
import sys
import os
import logging

sys.path.insert(0,'../src')
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 100

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [7]:
pd.__version__

'0.23.4'

In [6]:
from io_utils import save_data_to_file, load_data_from_file
from main_methods import select_spatial_genes
from sim_utils import evaluate_rejections
from general_utils import norm_mtx

def read_trial_data(sim_dir, entry):
    lfn = os.path.join(sim_dir, entry['locs_fn'])
    locs = load_data_from_file(lfn, 'csv')
    dfn = os.path.join(sim_dir, entry['expr_fn'])
    data = load_data_from_file(dfn, 'csv')
    return locs, data
    
# sim_dir = '/scratch/PI/sabatti/space_comp_sim/20191030'
sim_dir = '/scratch/PI/sabatti/space_comp_sim/20191104'
sim_fn = os.path.join(sim_dir, 'sim_setup.csv')
sim_df = load_data_from_file(sim_fn, 'csv')
display(sim_df.head())

meth_dir = 'result_ours'
cache=False

meth_dir = os.path.join(sim_dir, meth_dir)
os.makedirs(meth_dir, exist_ok=True)

eval_fn = 'eval_ours.csv'
eval_fn = os.path.join(sim_dir, eval_fn)
eval_df = pd.DataFrame()


alpha= 0.05
knn = 8
n_perm = 10000

for i, entry in sim_df.iterrows():
    locs, data_norm = read_trial_data(sim_dir, entry)
    data_norm = norm_mtx(data_norm)
    assert locs.shape[0] == data_norm.shape[0], 'Mismatch samples'
    
    fn = os.path.join(meth_dir, 'result_{}.csv'.format(i))
    if cache and os.path.exists(fn):
        result = load_data_from_file(fn, 'csv')
        rej_idx = result.loc[result['reject']].index
    else:
        pvals, rej_idx = select_spatial_genes(locs, 
                                              data_norm.values, 
                                              knn=knn, 
                                              alpha=alpha, 
                                              n_perm=n_perm)
        reject = np.array([False] * len(pvals))
        reject[rej_idx] = True
        result = pd.DataFrame({'pvalue': pvals, 'reject': reject}, 
                               index=data_norm.columns)
        # save result to file
        save_data_to_file(result, fn, 'csv')        
    # evaluate for tracking
    nn_idx = np.arange(entry['n_per_reg'] * entry['n_regs'])
    evals = evaluate_rejections(set(rej_idx), set(nn_idx))
    print('{}-{}: Power: {:.4f}, FDP: {:.4f}'.format(entry['temp'], entry['seed'], 
                                              evals['Power'], evals['FDP']))
    
    eval_df = eval_df.append(pd.Series(evals, name=i))
save_data_to_file(eval_df, eval_fn, 'csv')

Unnamed: 0,seed,temp,n_per_reg,n_regs,locs_fn,expr_fn
0,0,linear,25,4,locs_2d.csv,sim_data/mtx_linear_0.csv
1,0,diaquad,25,4,locs_2d.csv,sim_data/mtx_diaquad_0.csv
2,0,radial,25,4,locs_2d.csv,sim_data/mtx_radial_0.csv
3,0,cosine,25,4,locs_2d.csv,sim_data/mtx_cosine_0.csv
4,1,linear,25,4,locs_2d.csv,sim_data/mtx_linear_1.csv


linear-0: Power: 0.7700, FDP: 0.0375
diaquad-0: Power: 0.8000, FDP: 0.0123
radial-0: Power: 0.7800, FDP: 0.0488
cosine-0: Power: 0.7500, FDP: 0.0506
linear-1: Power: 0.7600, FDP: 0.0130
diaquad-1: Power: 0.8100, FDP: 0.0581
radial-1: Power: 0.7800, FDP: 0.0127
cosine-1: Power: 0.7700, FDP: 0.0375
linear-2: Power: 0.8100, FDP: 0.0241
diaquad-2: Power: 0.7900, FDP: 0.0366
radial-2: Power: 0.7800, FDP: 0.0250
cosine-2: Power: 0.7500, FDP: 0.0132
linear-3: Power: 0.7600, FDP: 0.0380
diaquad-3: Power: 0.7500, FDP: 0.0506
radial-3: Power: 0.8000, FDP: 0.0698
cosine-3: Power: 0.7500, FDP: 0.0000
linear-4: Power: 0.7900, FDP: 0.0595
diaquad-4: Power: 0.7800, FDP: 0.0250
radial-4: Power: 0.7500, FDP: 0.0132
cosine-4: Power: 0.7500, FDP: 0.0260
linear-5: Power: 0.7900, FDP: 0.0000
diaquad-5: Power: 0.7500, FDP: 0.0132
radial-5: Power: 0.7600, FDP: 0.0256
cosine-5: Power: 0.7500, FDP: 0.0000
linear-6: Power: 0.7900, FDP: 0.0125
diaquad-6: Power: 0.8200, FDP: 0.0353
radial-6: Power: 0.7700, FDP: 0

In [11]:
from io_utils import save_data_to_file, load_data_from_file
from main_methods import select_spatial_genes
from sim_utils import evaluate_rejections
from general_utils import norm_mtx

def read_trial_data(sim_dir, entry):
    lfn = os.path.join(sim_dir, entry['locs_fn'])
    locs = load_data_from_file(lfn, 'csv')
    dfn = os.path.join(sim_dir, entry['expr_fn'])
    data = load_data_from_file(dfn, 'csv')
    return locs, data
    
sim_dir = '/scratch/PI/sabatti/space_comp_sim/20191030'
sim_fn = os.path.join(sim_dir, 'sim_setup.csv')
sim_df = load_data_from_file(sim_fn, 'csv')
display(sim_df.head())

meth_dir = 'result_ours'
cache=True

meth_dir = os.path.join(sim_dir, meth_dir)
os.makedirs(meth_dir, exist_ok=True)

eval_fn = 'eval_ours.csv'
eval_fn = os.path.join(sim_dir, eval_fn)
eval_df = pd.DataFrame()


alpha= 0.05
knn = 8
n_perm = 10000

for i, entry in sim_df.iterrows():
    locs, data_norm = read_trial_data(sim_dir, entry)
    data_norm = norm_mtx(data_norm)
    assert locs.shape[0] == data_norm.shape[0], 'Mismatch samples'
    
    fn = os.path.join(meth_dir, 'result_{}.csv'.format(i))
    if cache and os.path.exists(fn):
        result = load_data_from_file(fn, 'csv')
        rej_idx = result.loc[result['reject']].index
    else:
        pvals, rej_idx = select_spatial_genes(locs, 
                                              data_norm.values, 
                                              knn=knn, 
                                              alpha=alpha, 
                                              n_perm=n_perm)
        reject = np.array([False] * len(pvals))
        reject[rej_idx] = True
        result = pd.DataFrame({'pvalue': pvals, 'reject': reject}, 
                               index=data_norm.columns)
        # save result to file
        save_data_to_file(result, fn, 'csv')        
    # evaluate for tracking
    nn_idx = np.arange(entry['n_per_reg'] * entry['n_regs'])
    evals = evaluate_rejections(set(rej_idx), set(nn_idx))
    print('{}-{}: Power: {:.4f}, FDP: {:.4f}'.format(entry['temp'], entry['seed'], 
                                              evals['Power'], evals['FDP']))
    
    eval_df = eval_df.append(pd.Series(evals, name=i))
save_data_to_file(eval_df, eval_fn, 'csv')

Unnamed: 0,seed,temp,n_per_reg,n_regs,locs_fn,expr_fn
0,0,linear,25,4,locs_2d.csv,sim_data/mtx_linear_0.csv
1,0,diaquad,25,4,locs_2d.csv,sim_data/mtx_diaquad_0.csv
2,0,radial,25,4,locs_2d.csv,sim_data/mtx_radial_0.csv
3,0,cosine,25,4,locs_2d.csv,sim_data/mtx_cosine_0.csv
4,1,linear,25,4,locs_2d.csv,sim_data/mtx_linear_1.csv


linear-0: Power: 1.0000, FDP: 0.0291
diaquad-0: Power: 0.9700, FDP: 0.0300
radial-0: Power: 1.0000, FDP: 0.0196
cosine-0: Power: 0.7600, FDP: 0.0617
linear-1: Power: 1.0000, FDP: 0.0099
diaquad-1: Power: 0.9500, FDP: 0.0306
radial-1: Power: 1.0000, FDP: 0.0099
cosine-1: Power: 0.7500, FDP: 0.0625
linear-2: Power: 1.0000, FDP: 0.0196
diaquad-2: Power: 0.9900, FDP: 0.0000
radial-2: Power: 1.0000, FDP: 0.0385
cosine-2: Power: 0.7600, FDP: 0.0256
linear-3: Power: 1.0000, FDP: 0.0196
diaquad-3: Power: 0.9600, FDP: 0.0303
radial-3: Power: 0.9900, FDP: 0.0198
cosine-3: Power: 0.7500, FDP: 0.0506
linear-4: Power: 1.0000, FDP: 0.0196
diaquad-4: Power: 0.9400, FDP: 0.0309
radial-4: Power: 0.9900, FDP: 0.0198
cosine-4: Power: 0.7500, FDP: 0.0506
linear-5: Power: 1.0000, FDP: 0.0196
diaquad-5: Power: 0.9700, FDP: 0.0300
radial-5: Power: 1.0000, FDP: 0.0099
cosine-5: Power: 0.7600, FDP: 0.0130
linear-6: Power: 1.0000, FDP: 0.0476
diaquad-6: Power: 0.9800, FDP: 0.0297
radial-6: Power: 1.0000, FDP: 0

In [6]:
data_norm.shape

(1045, 200)

In [7]:
locs.shape

(1045, 2)