In [67]:
import networkExpansionPy.lib as ne
from networkExpansionPy.folds import FoldRules,fold_expansion
import numpy as np
from random import sample
import pandas as pd


metabolism = ne.GlobalMetabolicNetwork()
metabolism.convertToIrreversible()

# remove all O2 dependent reactions
oxygen_dependent_rxns = metabolism.network[metabolism.network.cid.isin(['C00007'])].rn.unique().tolist()
o2_independent_rxns = [x for x in metabolism.network.rn.unique().tolist() if x not in oxygen_dependent_rxns]

# only keep anaerobic reactions
metabolism.subnetwork(o2_independent_rxns)

# define seed compounds
cpds = pd.read_csv('/Users/joshuagoldford/Documents/github/networkExpansionPy/networkExpansionPy/assets/compounds/seeds.csv')
cpds['CID'] = cpds['CID'].apply(lambda x: x.strip())
seed_set = cpds['CID'].unique().tolist()


# define fold rules
fold_rules = FoldRules()
fold_rules.setRules()



In [68]:
# determine scope of folds (reaction sets that involve the fold)
fold_dict = {}
for fold in fold_rules.folds:
    foldset = set([fold])
    rns = fold_rules.rules[fold_rules.rules.fold_sets.apply(lambda x: foldset.issubset(x))].rn.unique().tolist()
    fold_dict[fold] = rns
    
    
# if the fold cover reactions that are not in the metabolic netowrk, do not use fold in iterative fold expansion code
mrxns = metabolism.network.rn.unique().tolist()
fold_remove = []
for key,values in fold_dict.items():
    if all([x not in mrxns for x in values]):
        fold_remove.append(key)

In [69]:
# remove all rules with folds that are not used in metabolic network or are erroneous
fold_remove = fold_remove + ['PDBChainNotFound']
# remove folds that are not included in metabolic network at all
fold_rules.removeFolds(fold_remove)


False

In [99]:
cpd_set = seed_set
fold_set = set(['2002'])

In [100]:
c,re,rf = fold_expansion(metabolism,fold_rules,fold_set,cpd_set,rxn_set)

  x,y = netExp(R,P,x0,b)


In [101]:
len(c)

642

In [108]:
metabolism.compounds[metabolism.compounds.cid.isin(c)].iloc[350:400]

Unnamed: 0,cid,formula,name,num_rgroup,electrons
1564,C01879,C5H7NO3,Pidolic acid; 5-Oxoproline; Pyroglutamic acid;...,0,68.0
1607,C01935,(C12H20O10)n,Maltodextrin; Maltodextrin(n); Maltodextrin(n-...,0,0.0
1648,C01990,C6H6O8,3-Oxalomalate; 3-Oxalomalic acid,0,106.0
1652,C01996,C7H16NO2,Acetylcholine; O-Acetylcholine,0,80.0
1666,C02013,C24H42O21,Cellotetraose,0,354.0
1692,C02048,C12H22O11,Laminaribiose; Laminariaceae; 3-beta-D-Glucosy...,0,182.0
1707,C02067,C9H12N2O6,Pseudouridine,0,128.0
1713,C02076,C7H14O7,Sedoheptulose; D-altro-Heptulose; D-Sedoheptul...,0,112.0
1731,C02097,C16H26N2O16P2,dTDP-galactose; dTDP-D-galactose,0,294.0
1739,C02107,C4H6O6,"(S,S)-Tartaric acid; (S,S)-Tartrate; D-Tartrat...",0,78.0


In [64]:
# define a random permutation on the folds
folds = list(fold_rules.folds)
idx = np.random.permutation(len(folds))
fold_order = [folds[i] for i in idx]


In [51]:
fold_set = set();
cpd_set = seed_set
rxn_set = set();
iteration = 0;
cpds_iteration = {'cid': list(seed_set), 'iteration' : [iteration for x in seed_set]}
rxns_iteration = {'rn': list(rxn_set) , 'iteration' : [iteration for x in rxn_set]}


for fold in fold_order:
    
    iteration = iteration + 1;
    fold_set = fold_set.union(set([fold]))
    c,re,rf = fold_expansion(metabolism,fold_rules,fold_set,cpd_set,rxn_set)
    c = set(c); re = set(re); rf = set(rf)
    
    c_new = [x for x in c if x not in cpd_set]
    re_new = [x for x in re if x not in rxn_set]
    
    c_iter = [iteration for x in c_new]
    r_iter = [iteration for x in re_new]
    cpds_iteration['cid'] = cpds_iteration['cid'] + c_new
    cpds_iteration['iteration'] = cpds_iteration['iteration'] + c_iter
    rxns_iteration['rn'] = rxns_iteration['rn'] + re_new
    rxns_iteration['iteration'] = rxns_iteration['iteration'] + r_iter
    
    # now add seed sets and reaction sets for next iteration
    cpd_set = set(cpd_set).union(c_new)
    rxn_set = rxn_set.union(re_new)

cpd_df = pd.DataFrame(cpds_iteration)
rxn_df = pd.DataFrame(rxns_iteration)
fold_df = pd.DataFrame({'iteration': list(range(len(fold_order))), 'fold': fold_order})

# join cpd table to fold table
cpd_df = cpd_df.set_index('iteration').join(fold_df.set_index('iteration')).reset_index()
rxn_df = rxn_df.set_index('iteration').join(fold_df.set_index('iteration')).reset_index()

  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)
  x,y = netExp(R,P,x0,b)


In [111]:
sample([1,2,3],1)[0]

2