In [11]:
import networkx as nx
import pandas as pd
import numpy as np
import os

def read_in_network():
    fn = 'input_files/generegulation_tmp.txt'
    tab = pd.read_table(fn,skiprows=11,header=None)
    cols = []
    with open(fn,'r') as fh:
        ln = fh.readline().strip()
        while ln.startswith('#'):
            if ') ' in ln:
                cols.append(ln.split(') ')[1])
            ln = fh.readline().strip()
    tab.columns = cols
    return tab

def simplify(G):
    simplified_nodes = []
    for node in G.nodes:
        in_degree = G.in_degree(node)
        out_degree = G.out_degree(node)
        if out_degree == 0:
            continue
        else:
            simplified_nodes.append(node)
    G_simplified = G.subgraph(simplified_nodes)
    print(len(simplified_nodes))
    return G_simplified

def main():
    #df = pd.read_excel('raw.xlsx',engine='openpyxl',sheet_name='Sheet1')
    df = read_in_network()
    df_sc = df.loc[:,['GENE_NAME_REGULATOR','GENE_NAME_REGULATED','GENEREGULATION_FUNCTION']]
    nodes = []
    edges = []
    sdef_edges = df_sc[df_sc.GENEREGULATION_FUNCTION.isin(['activator','repressor'])]
    G = nx.DiGraph()
    G.add_weighted_edges_from([(a,b,1 if c=='activator' else -1) for __,(a,b,c) in sdef_edges.iterrows()])
    #print(len(nodes),len(edges))
    #np.save('nodes.npy',np.array(nodes))
    #%%
    #sizes of strongly connected components
    sccs = sorted(nx.strongly_connected_components(G),key=len, reverse=True)
    wccs = sorted(nx.weakly_connected_components(G),key=len, reverse=True)
    len_sccs = [len(c) for c in sorted(nx.strongly_connected_components(G),key=len, reverse=True)]    
    len_wccs = [len(c) for c in sorted(nx.weakly_connected_components(G),key=len, reverse=True)] 

    #%%
    N = G.number_of_nodes()
    #shortest_path_lengths = np.zeros((num_nodes,num_nodes))
    nums_reached = []
    nodes_reached = []

    for j in range(N):
        num_reached = N
        nodes_reached_by_j = list(np.array(nodes))

        for k in range(N):
            #print(k)
            try:
                length=nx.shortest_path_length(G,source=nodes[j],target=nodes[k])

            except nx.NetworkXNoPath:
                #length = 10*N
                num_reached -= 1
                nodes_reached_by_j.remove(nodes[k])
        nums_reached.append(num_reached)
        nodes_reached.append(nodes_reached_by_j)

    idx_reached_most = nums_reached.index(max(nums_reached))          
    reached_most = nodes_reached[idx_reached_most]
    reached_most.append(nodes[idx_reached_most])

    print(len(reached_most))

    G_wcc = G.subgraph(list(wccs[0]))

    l = [e for e in G_wcc.in_edges('cpxR',data=True) if e[2]['weight']>0]

    G_reach = G.subgraph(reached_most)
    G_ws2 = simplify(simplify(simplify(G_wcc)))
    G_rs2 = simplify(simplify(simplify(G_reach)))

    nx.write_gml('rs2.gml',G_rs2)
    
if __name__ == '__main__':
    main()

FileNotFoundError: [Errno 2] No such file or directory: 'generegulation_tmp.txt'

Unnamed: 0,GENE_ID_REGULATOR,GENE_NAME_REGULATOR,TF_ID_REGULATOR,TRANSCRIPTION_FACTOR_NAME,TF_CONFORMATION,CONFORMATION_STATUS,GENE_ID_REGULATED,GENE_NAME_REGULATED,GENEREGULATION_FUNCTION
0,ECK120000017,cpxR,ECK120011210,CpxR,CpxR-Phosphorylated,Pi,ECK120000075,aroG,activator
1,ECK120000017,cpxR,ECK120011210,CpxR,CpxR-Phosphorylated,Pi,ECK120004163,mzrA,activator
2,ECK120000017,cpxR,ECK120011210,CpxR,CpxR-Phosphorylated,Pi,ECK120000939,slt,activator
3,ECK120000017,cpxR,ECK120011210,CpxR,CpxR-Phosphorylated,Pi,ECK120000917,sbmA,activator
4,ECK120000017,cpxR,ECK120011210,CpxR,CpxR-Phosphorylated,Pi,ECK120001023,tsr,repressor
...,...,...,...,...,...,...,...,...,...
10233,ECK120004441,bglJ,ECK120011324,RcsB,RcsB-P<sup>asp56</sup>,Pi,ECK120000672,osmB,activator
10234,ECK120004441,bglJ,ECK120011324,RcsB,RcsB-P<sup>asp56</sup>,Pi,ECK120002216,rseA,activator
10235,ECK120004441,bglJ,ECK125257193,RcsB-BglJ,RcsB-BglJ,Pi,ECK120003323,ynbD,activator
10236,ECK120004441,bglJ,ECK120011324,RcsB,RcsB-P<sup>asp56</sup>,Pi,ECK120001366,yhiD,activator
