In [None]:
import networkx as nx
import numpy as np
import matplotlib

In [None]:
## note: this is a bit weird way of transforming the data to suitable format
data = open('data/network_clustering.csv', 'rb')

network  = nx.read_edgelist( data, delimiter = ',', create_using=nx.Graph() )

In [None]:
nx.draw_networkx( network )

## Community detection

See the [documentation](https://networkx.github.io/documentation/stable/reference/algorithms/community.html) for a full list of various community detection algorithms.

Unlike in R, networkx returns all communities which could be extracted using the algorithm. Therefore, we need to evaluate how good that is, e.g., what is the number of ties within the community and number of ties not outside the community compared with all ties. The higher the number, the better the community -- computationally.

In [None]:
from networkx.algorithms import community

In [None]:
for com in community.girvan_newman( network ):
    print( com )
    print( "There are", len( com ), "communities" )
    print( community.performance( network, com ) )

### Using clustering results

The clustering code provides a list of nodes and where they belong.

You may even use more advanced tricks for [plotting](https://stackoverflow.com/questions/43541376/how-to-draw-communities-with-networkx) communities.

In [None]:
communities = list( community.girvan_newman( network ) )[1]
print( communities )

In [None]:
# for coloring this list, we need to have information in which cluster each node belongs in
# this taks care of it, by creating list of colors and assigning each node the the number where it belongs to

colors = [-1 for i in range( len( network.nodes ) )]
for i, nodes in enumerate( communities ):
    for n in nodes:
        n = int(n) - 1
        colors[ n ] = i

pos = nx.spring_layout( network )
nx.draw_networkx_nodes( network, pos, node_color = colors  )
nx.draw_networkx_edges( network, pos )

## Task

* move from the simple example dataset to organization x dataset
* compare more clustering algorithms, do you find differences in results?