## Import libs, set paths and load params

In [1]:
import os, glob
import numpy as np
import pandas as pd
import sys
sys.path.insert(0, "../src")
import auxilary_functions as f
import subprocess
import csv
import matplotlib.pyplot as plt

cfg_file = "../src/config-ecoli.json"
cfg = f.get_actual_parametrization("../src/config-ecoli.json")
networks = ['fflatt']
organisms = ['ecoli']
sizes = ['500']
n_trials = 3

cascades=['1','3','10'] #1 3 10
p2=['0.5','0.7','0.9'] #0.2, 0.5, 0.8 (and 0.3?)
p4=['0.5','0.7','0.9'] #0.2, 0.5, 0.8 (and 0.3?)

os.chdir('../networks/')
fflattdir = '../snippets/'
topology_dir = os.path.join(os.getcwd(), 'topology_experiment')

In [None]:
#collect data
for size in sizes:
    for cascade in cascades:
        for network in p2:
            for organism in p4:
                current_dir = os.path.join(topology_dir, size, cascade, network, organism)
            
                if not os.path.exists(os.path.abspath(current_dir)):
                    print('making dirs...')
                    os.makedirs(os.path.abspath(current_dir), exist_ok=True)
                
                    print('running fflatt...')
                    subprocess.call(['python3', fflattdir+'parameter_space_exploration.py',\
                                     cfg_file, size, str(n_trials), current_dir, network, organism, cascade])

making dirs...
running fflatt...


## Display and save z-scores

In [5]:
for size in sizes:
    for cascade in cascades:
        for network in p2:
            for organism in p4:
                current_dir = os.path.join(topology_dir, size, cascade, network, organism)
                for rep, file in enumerate(glob.glob(os.path.join(current_dir, '*sv'))):
                    if not os.path.exists(os.path.join(topology_dir, 'z-scores', size+'_'+cascade+'_'+network+'_'+organism+'_'+str(rep)+'_z_score.tsv')):
                        pandas_df_lst = []
                        print(rep, file)
                        report = f.analyze_exctracted_network(cfg, file, network, rep, size, stability_motifs=True)
                        print(report)
                        pandas_df_lst.append(report)
                        pandas_df_list = sum(pandas_df_lst)/len(pandas_df_lst)
                        pandas_df_list['size'] = size
                        pandas_df_list['p2_value'] = network
                        pandas_df_list['p4_value'] = organism
                        pandas_df_list['cascade_value'] = cascade
                        pandas_df_list['rep_num'] = rep
                        print(pandas_df_list)                                     
                        pandas_df_list.to_csv(os.path.join(topology_dir, 'z-scores', size+'_'+cascade+'_'+network+'_'+organism+'_'+str(rep)+'_z_score.tsv'))

0 /home/erik/sweden/sonnhammer/GeneSnake/generation/network_generation_algo/networks/topology_experiment/500/3/0.5/0.8/fflatt_transcriptional_network_1_nodes_500.tsv


100%|██████████| 10/10 [00:09<00:00,  1.00it/s]
100%|██████████| 10/10 [00:10<00:00,  1.02s/it]
100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
100%|██████████| 10/10 [00:10<00:00,  1.01s/it]
100%|██████████| 10/10 [00:09<00:00,  1.03it/s]
100%|██████████| 10/10 [00:09<00:00,  1.05it/s]
100%|██████████| 10/10 [00:09<00:00,  1.04it/s]
100%|██████████| 10/10 [00:09<00:00,  1.05it/s]
100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
100%|██████████| 10/10 [00:10<00:00,  1.07s/it]

       N_real  mean(N_rand)  sd(N_rand)   Z-score  P-value
021C   5409.0       5506.47  400.651830 -0.243279     0.37
021D  30197.0      30505.83  367.844615 -0.839566     0.18
021U    609.0        902.25   52.830933 -5.550725     0.00
030C      8.0         41.64   11.621979 -2.894516     0.00
030T    916.0        599.28   53.504968  5.919450     0.00
       N_real  mean(N_rand)  sd(N_rand)   Z-score  P-value size p2_value  \
021C   5409.0       5506.47  400.651830 -0.243279     0.37  500      0.5   
021D  30197.0      30505.83  367.844615 -0.839566     0.18  500      0.5   
021U    609.0        902.25   52.830933 -5.550725     0.00  500      0.5   
030C      8.0         41.64   11.621979 -2.894516     0.00  500      0.5   
030T    916.0        599.28   53.504968  5.919450     0.00  500      0.5   

     p4_value cascade_value  
021C      0.8             3  
021D      0.8             3  
021U      0.8             3  
030C      0.8             3  
030T      0.8             3  
1 /home/e


100%|██████████| 10/10 [00:12<00:00,  1.23s/it]
100%|██████████| 10/10 [00:12<00:00,  1.24s/it]
100%|██████████| 10/10 [00:12<00:00,  1.29s/it]
100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
100%|██████████| 10/10 [00:11<00:00,  1.10s/it]
100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
100%|██████████| 10/10 [00:12<00:00,  1.27s/it]
100%|██████████| 10/10 [00:12<00:00,  1.26s/it]

       N_real  mean(N_rand)  sd(N_rand)   Z-score  P-value
021C   4133.0       4820.90  483.328801 -1.423255     0.07
021D  46393.0      47345.03  451.494395 -2.108620     0.02
021U    664.0        902.02   46.621879 -5.105328     0.00
030C      2.0         29.04    8.306528 -3.255271     0.00
030T    966.0        599.05   59.919509  6.124049     0.00
       N_real  mean(N_rand)  sd(N_rand)   Z-score  P-value size p2_value  \
021C   4133.0       4820.90  483.328801 -1.423255     0.07  500      0.5   
021D  46393.0      47345.03  451.494395 -2.108620     0.02  500      0.5   
021U    664.0        902.02   46.621879 -5.105328     0.00  500      0.5   
030C      2.0         29.04    8.306528 -3.255271     0.00  500      0.5   
030T    966.0        599.05   59.919509  6.124049     0.00  500      0.5   

     p4_value cascade_value  
021C      0.8             3  
021D      0.8             3  
021U      0.8             3  
030C      0.8             3  
030T      0.8             3  
2 /home/e


100%|██████████| 10/10 [00:13<00:00,  1.40s/it]
100%|██████████| 10/10 [00:13<00:00,  1.38s/it]
100%|██████████| 10/10 [00:14<00:00,  1.41s/it]
100%|██████████| 10/10 [00:13<00:00,  1.33s/it]
100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████| 10/10 [00:12<00:00,  1.28s/it]
100%|██████████| 10/10 [00:12<00:00,  1.21s/it]
100%|██████████| 10/10 [00:12<00:00,  1.23s/it]
100%|██████████| 10/10 [00:13<00:00,  1.37s/it]
100%|██████████| 10/10 [00:13<00:00,  1.34s/it]

       N_real  mean(N_rand)  sd(N_rand)   Z-score  P-value
021C   4053.0       5444.35  560.713106 -2.481394     0.01
021D  51603.0      53074.96  531.308082 -2.770445     0.00
021U    719.0        841.90   44.952308 -2.734009     0.00
030C     13.0         23.54    6.263258 -1.682830     0.06
030T    642.0        413.73   49.787118  4.584921     0.00
       N_real  mean(N_rand)  sd(N_rand)   Z-score  P-value size p2_value  \
021C   4053.0       5444.35  560.713106 -2.481394     0.01  500      0.5   
021D  51603.0      53074.96  531.308082 -2.770445     0.00  500      0.5   
021U    719.0        841.90   44.952308 -2.734009     0.00  500      0.5   
030C     13.0         23.54    6.263258 -1.682830     0.06  500      0.5   
030T    642.0        413.73   49.787118  4.584921     0.00  500      0.5   

     p4_value cascade_value  
021C      0.8             3  
021D      0.8             3  
021U      0.8             3  
030C      0.8             3  
030T      0.8             3  





In [5]:
df_topo

Unnamed: 0,ffl-nodes,sparsity,in-degree,out-degree,network,size,rep
0,431,2.35,0.965,1.385,fflatt,500,0
1,656,2.396,0.945,1.451,fflatt,750,0
2,757,2.305,0.976,1.329,fflatt,1000,0
