In [1]:
import networkx as nx
from community import community_louvain
import random
import matplotlib.colors as mcolors
import numpy as np
import pandas as pd

def generate_com_from_df(df):
    coms = {}
    for row, node in df.iterrows():
        if node[1] not in coms:
            coms[node[1]] = []
        coms[node[1]].append(node[0])
    return coms

def generate_com_from_dict(dict):
    coms = {}
    for node, com in dict.items():
        if com not in coms:
            coms[com] =[]
        coms[com].append(node)
    return coms

def generate_node_dict(coms=dict):
    node_dict = {}
    for key, value in coms.items():
        for node in value:
            node_dict[node] = key

    return node_dict

def random_color_generator():
    color = random.choice(list(mcolors.CSS4_COLORS.keys()))
    return color

def plot_coms(G, coms):
    colour_map = []

    colour_dict = {}
    for com in coms:
        colour_dict[com] = random_color_generator()

    for node in G:
        for com in coms:
            if node in coms[com]:
                colour_map.append(colour_dict[com])
    nx.draw(G, node_color=colour_map)

In [10]:
G0 = nx.read_edgelist("cleaned.txt",comments="#",nodetype=str)
largest_cc = max(nx.connected_components(G0),key=len)
G0 = G0.subgraph(largest_cc)

In [11]:
partition = pd.read_csv('partition_louvain.txt', delimiter=' ', names = [0, 1])

coms = generate_com_from_df(partition)
node_dict = generate_node_dict(coms)

target = ["YDL090C", 'YJR117W', 'YMR274C']
target_nums = []
for node in target:
    target_nums.append(node_dict[node])

target_num = np.unique(target_nums)[0]
G = G0.subgraph(coms[target_num])
number_of_communities = max(node_dict.values())+1 #We add one because the indexing starts at 0.

In [14]:
target = ["YDL090C", 'YJR117W', 'YMR274C']
target_nums = []
for node in target:
    target_nums.append(node_dict[node])
print(target_nums)
target_com = np.unique(target_nums)[0]
print(len(coms[target_com]))
print(number_of_communities)

[11, 11, 11]
541
20


In [15]:
H = G.subgraph(coms[target_com])

In [20]:
scores = []
for i in range(1000):
    node_dict2 = community_louvain.best_partition(H, random_state=i)
    coms2 = generate_com_from_dict(node_dict2)
    number_of_communities = max(node_dict2.values())+1 #We add one because the indexing starts at 0.
    scores.append(nx.community.modularity(H, coms2.values()))

In [28]:
print(max(scores), scores.index(max(scores)))

0.6976535156682518 84


In [29]:
node_dict2 = community_louvain.best_partition(H, random_state=84)
coms2 = generate_com_from_dict(node_dict2)
number_of_communities = max(node_dict2.values())+1 #We add one because the indexing starts at 0.

In [30]:
nx.community.modularity(H, coms2.values())

0.6976535156682518

In [26]:
node_df = pd.DataFrame.from_dict(node_dict2, orient='index')
node_df.to_csv(f'inner_louvain_partition.csv', header=False)