## Import libs, set paths and load params

In [46]:
import os, glob
import numpy as np
import pandas as pd
import sys
sys.path.insert(0, "../src")
import auxilary_functions as f
import subprocess
import csv
import matplotlib.pyplot as plt

cfg_file = "../src/config-ecoli.json"
cfg = f.get_actual_parametrization("../src/config-ecoli.json")
networks = ['fflatt']
organisms = ['ecoli']
sizes = ['500']
n_trials = 7

cascades=['1','3','10'] #1 3 10
cascades=['1']
p2=['0.5','0.7','0.9'] #0.2, 0.5, 0.8 (and 0.3?)
p4=['0.5','0.7','0.9'] #0.2, 0.5, 0.8 (and 0.3?)

os.chdir('../networks/')
fflattdir = '../snippets/'
topology_dir = os.path.join(os.getcwd(), 'topology_experiment')

In [3]:
#collect data
for size in sizes:
    for cascade in cascades:
        for network in p2:
            for organism in p4:
                current_dir = os.path.join(topology_dir, size, cascade, network, organism)
            
                if not os.path.exists(os.path.abspath(current_dir)):
                    print('making dirs...')
                    os.makedirs(os.path.abspath(current_dir), exist_ok=True)
                
                    print('running fflatt...')
                    subprocess.call(['python3', fflattdir+'parameter_space_exploration.py',\
                                     cfg_file, size, str(n_trials), current_dir, network, organism, cascade])

making dirs...
running fflatt...


## Display and save z-scores

In [29]:
for size in sizes:
    for cascade in cascades:
        for network in p2:
            for organism in p4:
                current_dir = os.path.join(topology_dir, size, cascade, network, organism)
                for rep, file in enumerate(glob.glob(os.path.join(current_dir, '*sv'))):
                    if not os.path.exists(os.path.join(topology_dir, 'z-scores', size+'_'+cascade+'_'+network+'_'+organism+'_'+str(rep)+'_z_score.tsv')):
                        pandas_df_lst = []
                        print(rep, file)
                        report = f.analyze_exctracted_network(cfg, file, network, rep, size, stability_motifs=True)
                        print(report)
                        pandas_df_lst.append(report)
                        pandas_df_list = sum(pandas_df_lst)/len(pandas_df_lst)
                        pandas_df_list['size'] = size
                        pandas_df_list['p2_value'] = network
                        pandas_df_list['p4_value'] = organism
                        pandas_df_list['cascade_value'] = cascade
                        pandas_df_list['rep_num'] = rep
                        print(pandas_df_list)                                     
                        pandas_df_list.to_csv(os.path.join(topology_dir, 'z-scores', size+'_'+cascade+'_'+network+'_'+organism+'_'+str(rep)+'_z_score.tsv'))

In [30]:
#df_topo

## Group-by z-scores and save as table

In [34]:
zscore_stats_lst = []
zscore_stats_lst = []
for rep, file in enumerate(glob.glob(os.path.join(topology_dir, 'z-scores', '*.tsv'))):
    zscore_stats_df = pd.io.parsers.read_csv(file, sep=",", index_col=0, header=None, skiprows=1)
    zscore_stats_df['motif'] = zscore_stats_df.index
    zscore_stats_df.reset_index()
    zscore_stats_df.columns = ['counts_ori', 'counts_rand', 'sd_rand',\
                'z-score', 'p-val', 'size', 'p2', 'p4', 'cascades', 'rep_num', 'motif']
    print(zscore_stats_df)
    zscore_stats_lst.append(zscore_stats_df)

      counts_ori  counts_rand     sd_rand   z-score  p-val  size   p2   p4  \
0                                                                            
021C      6447.0      7274.61  627.674373 -1.318534   0.08   500  0.5  0.5   
021D     38996.0     40273.45  572.520679 -2.231273   0.00   500  0.5  0.5   
021U      1589.0      1679.20   76.459532 -1.179709   0.08   500  0.5  0.5   
030C        26.0        48.96   11.370945 -2.019181   0.00   500  0.5  0.5   
030T       972.0       559.01   60.361162  6.841982   0.00   500  0.5  0.5   

      cascades  rep_num motif  
0                              
021C        10        0  021C  
021D        10        0  021D  
021U        10        0  021U  
030C        10        0  030C  
030T        10        0  030T  
      counts_ori  counts_rand     sd_rand   z-score  p-val  size   p2   p4  \
0                                                                            
021C      6530.0      6255.71  396.960131  0.690976   0.23   500  0.5  0.

      counts_ori  counts_rand     sd_rand   z-score  p-val  size   p2   p4  \
0                                                                            
021C      2082.0      6285.17  685.027154 -6.135771   0.00   500  0.9  0.5   
021D     59793.0     63876.01  653.941091 -6.243697   0.00   500  0.9  0.5   
021U       716.0       802.34   40.361174 -2.139185   0.00   500  0.9  0.5   
030C         0.0        15.84    4.795248 -3.303270   0.00   500  0.9  0.5   
030T       279.0       349.73   49.762005 -1.421366   0.03   500  0.9  0.5   

      cascades  rep_num motif  
0                              
021C         1        1  021C  
021D         1        1  021D  
021U         1        1  021U  
030C         1        1  030C  
030T         1        1  030T  
      counts_ori  counts_rand     sd_rand   z-score  p-val  size   p2   p4  \
0                                                                            
021C      1489.0      5076.46  735.215988 -4.879464   0.00   500  0.9  0.

In [41]:
zscore_stats_df = pd.concat(zscore_stats_lst)
zscore_stats_df.reset_index(drop=True, inplace=True)
zscore_stats_df = zscore_stats_df[zscore_stats_df['cascades']==1]
zscore_stats_df = zscore_stats_df.drop('cascades', 1)
zscore_stats_df

Unnamed: 0,counts_ori,counts_rand,sd_rand,z-score,p-val,size,p2,p4,rep_num,motif
135,3430.0,4690.85,428.616224,-2.941676,0.01,500,0.5,0.5,0,021C
136,40134.0,41610.85,397.542837,-3.714946,0.00,500,0.5,0.5,0,021D
137,682.0,852.44,46.708526,-3.649013,0.00,500,0.5,0.5,0,021U
138,12.0,33.37,8.985160,-2.378366,0.00,500,0.5,0.5,0,030C
139,769.0,509.62,53.107020,4.884100,0.00,500,0.5,0.5,0,030T
...,...,...,...,...,...,...,...,...,...,...
265,2255.0,4984.24,568.809127,-4.798165,0.00,500,0.9,0.9,2,021C
266,53966.0,56679.44,543.148770,-4.995758,0.00,500,0.9,0.9,2,021D
267,752.0,774.84,33.630260,-0.679150,0.26,500,0.9,0.9,2,021U
268,0.0,13.50,4.916299,-2.745968,0.00,500,0.9,0.9,2,030C


In [43]:
zscore_stats_df_mean = zscore_stats_df.groupby(['p2', 'p4', 'motif']).mean()
zscore_stats_df_mean = zscore_stats_df_mean['z-score'].unstack()
zscore_stats_df_mean = zscore_stats_df_mean.round(3)
zscore_stats_df_mean

Unnamed: 0_level_0,motif,021C,021D,021U,030C,030T
p2,p4,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.5,0.5,-2.26,-2.991,-3.666,-2.967,4.6
0.5,0.7,-2.484,-3.04,-4.534,-2.796,5.393
0.5,0.9,-2.078,-2.544,-4.354,-2.343,5.306
0.7,0.5,-4.129,-4.488,-2.035,-3.002,3.428
0.7,0.7,-2.483,-2.754,-2.254,-2.433,3.925
0.7,0.9,-4.011,-4.164,-2.755,-2.976,0.682
0.9,0.5,-5.465,-5.542,-1.894,-2.881,-1.12
0.9,0.7,-3.63,-3.855,-0.744,-3.066,2.367
0.9,0.9,-4.56,-4.708,-0.857,-2.761,1.192


In [44]:
zscore_stats_df_std = zscore_stats_df.groupby(['p2', 'p4', 'motif']).std()
zscore_stats_df_std = zscore_stats_df_std['z-score'].unstack()
zscore_stats_df_std = zscore_stats_df_std.round(3)
zscore_stats_df_std

Unnamed: 0_level_0,motif,021C,021D,021U,030C,030T
p2,p4,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.5,0.5,1.563,1.546,0.401,0.709,0.981
0.5,0.7,0.367,0.293,0.545,0.321,1.653
0.5,0.9,1.431,1.22,1.837,0.351,1.46
0.7,0.5,0.106,0.241,0.452,0.086,0.833
0.7,0.7,0.599,0.527,0.174,0.559,1.67
0.7,0.9,1.336,1.276,0.529,0.369,1.331
0.9,0.5,0.632,0.655,0.214,0.366,0.314
0.9,0.7,0.541,0.574,0.091,0.242,0.471
0.9,0.9,0.231,0.266,0.161,0.376,0.211


In [47]:
final_table_s2 = zscore_stats_df_mean.astype(str) + u"\u00B1" + zscore_stats_df_std.astype(str)

In [48]:
final_table_s2.to_csv("s2_table.csv", sep="\t")