# Community detection

Classical algorithm used as a model: https://en.wikipedia.org/wiki/Louvain_method. Task is to minimize funcion $Q=\dfrac{1}{2m} \sum_{ij}(-A_{ij}+\dfrac{k_ik_j}{2m}) \delta(c_i,c_j)$.

Algorithm solves community detection problem of undirected graph $G = (V, E)$ where every edge there has weight $w_{xy}$.

QUBO elements $|V||C|$ where $C$ is set of communities. Maximum number of communities must be defined a priori. Constraints:
1. Vertex can belong to only one community 
2. Minimize above mentioned function. 

In [1]:
import numpy as np
import time
import dimod
from dwave.system import DWaveSampler, EmbeddingComposite, LeapHybridSampler
from dwave.samplers import SimulatedAnnealingSampler
import dwave.inspector
import networkx as nx
from networkx.classes.function import path_weight
import random

## QUBO function

In [2]:
def create_qubo2(E, vertices, communities, p):
    Q = np.zeros((vertices*communities, vertices*communities))
    
    # Constraint 1
    for i in range(vertices): 
        for j in range(communities): 
            for k in range(j+1,communities): 
                Q[i*communities+j,i*communities+k]=p
                
    # Constraint 2
    for e in E:
        for i in range(communities):
            for j in range(communities):
                if i==j:
                    Q[e[0]*communities+i, e[1]*communities+j] = -e[2]
                else:
                    Q[e[0]*communities+i, e[1]*communities+j] = e[2]
            
    return Q

In [3]:
def create_qubo(E, vertices, communities, p):
    Q = np.zeros((vertices*communities, vertices*communities))
    
    # We need helper datastructure to containt k
    k = np.zeros(vertices)
    for e in E:
        k[e[0]] += e[2]
        k[e[1]] += e[2]
    print(k)

    # Constraint 1
    for v in range(vertices): 
        for c1 in range(communities): 
            for c2 in range(c1+1,communities): 
                Q[v*communities+c1,v*communities+c2]=p
                
    # Constraint 2 and 3
    for v1 in range(vertices): 
        for v2 in range(v1+1,vertices): 
            for c in range(communities):
                Q[v1*communities+c, v2*communities+c] = round(k[v1]*k[v2] / (2*p),1)
                
    for e in E:
        for c in range(communities):
            Q[e[0]*communities+c, e[1]*communities+c] -= e[2]
            
    return Q

## Small example

In [4]:
E = np.array([(0, 2, 1), (1, 2, 2), (1, 3, 1), (0, 5, 8), (0, 1, 10), (3, 4, 6), (2, 4, 16)])
vertices = 6
communities = 3

Above graph visualised:

![](graph1.png)

In [5]:
max_count = 0
edges = len(E)
for e in E:
    max_count += e[2]
print('Max count:',max_count)

labels = {}
for i in range(vertices):
    for j in range(communities):
        labels[i*communities + j]=(i,j)

Max count: 44


In [6]:
ts = time.time()
Q = create_qubo(E, vertices, communities, max_count)
qubo_time = (time.time()-ts)*1000
print('Time used for construction Q (ms): {:.3f}\n'.format(qubo_time))
print(labels)
print(Q)

ts = time.time()
bqm = dimod.BinaryQuadraticModel(Q, 'BINARY')
bqm_time = (time.time()-ts)*1000
bqm = bqm.relabel_variables(labels, inplace=False)

[19. 13. 19.  7. 22.  8.]
Time used for construction Q (ms): 1.545

{0: (0, 0), 1: (0, 1), 2: (0, 2), 3: (1, 0), 4: (1, 1), 5: (1, 2), 6: (2, 0), 7: (2, 1), 8: (2, 2), 9: (3, 0), 10: (3, 1), 11: (3, 2), 12: (4, 0), 13: (4, 1), 14: (4, 2), 15: (5, 0), 16: (5, 1), 17: (5, 2)}
[[  0.   44.   44.   -7.2   0.    0.    3.1   0.    0.    1.5   0.    0.
    4.8   0.    0.   -6.3   0.    0. ]
 [  0.    0.   44.    0.   -7.2   0.    0.    3.1   0.    0.    1.5   0.
    0.    4.8   0.    0.   -6.3   0. ]
 [  0.    0.    0.    0.    0.   -7.2   0.    0.    3.1   0.    0.    1.5
    0.    0.    4.8   0.    0.   -6.3]
 [  0.    0.    0.    0.   44.   44.    0.8   0.    0.    0.    0.    0.
    3.2   0.    0.    1.2   0.    0. ]
 [  0.    0.    0.    0.    0.   44.    0.    0.8   0.    0.    0.    0.
    0.    3.2   0.    0.    1.2   0. ]
 [  0.    0.    0.    0.    0.    0.    0.    0.    0.8   0.    0.    0.
    0.    0.    3.2   0.    0.    1.2]
 [  0.    0.    0.    0.    0.    0.    0.   44.   4

In [7]:
ts = time.time()
sampleset = dimod.ExactSolver().sample(bqm)
det_time = (time.time()-ts)*1000
print('Time used (ms): {:.3f}\n'.format(det_time))
energy = sampleset.first.energy
print(sampleset.truncate(20))

Time used (ms): 348.528

   (0, 0) (0, 1) (0, 2) (1, 0) (1, 1) (1, 2) (2, 0) ... (5, 2) energy num_oc.
0       1      0      0      1      0      0      0 ...      0  -26.2       1
1       0      0      1      0      0      1      1 ...      1  -26.2       1
2       0      1      0      0      1      0      1 ...      0  -26.2       1
3       0      0      1      0      0      1      0 ...      1  -26.2       1
4       0      1      0      0      1      0      0 ...      0  -26.2       1
5       1      0      0      1      0      0      0 ...      0  -26.2       1
6       0      1      0      0      1      0      1 ...      0  -23.5       1
7       0      0      1      0      0      1      1 ...      1  -23.5       1
8       0      0      1      0      0      1      1 ...      1  -23.5       1
9       1      0      0      1      0      0      0 ...      0  -23.5       1
10      0      1      0      0      1      0      0 ...      0  -23.5       1
11      0      0      1      0      0  

In [8]:
for k,v in sampleset.first.sample.items():
    if v==1: print(k)

(0, 0)
(1, 0)
(2, 1)
(3, 1)
(4, 1)
(5, 0)
