In [5]:
using Pkg
Pkg.activate(@__DIR__)
Pkg.instantiate()

[32m[1m  Activating[22m[39m environment at `~/JuliaProjects/Clustering-ToMaTo/Project.toml`


In [13]:
using SparseArrays
using DelimitedFiles

In [14]:
using Revise
using ClusteringToMaTo

In [15]:
function densityG(G,eps=10.0^(-8),maxitr=200,d=0.85)
    B=float.(adjacency_matrix(G)')
    for i=1:size(B,1)
        Bc=B[:,i]
        soB=sum(Bc)
        B[:,i]=B[:,i]./soB
    end
    
    return pagerank(B,eps,maxitr,d)
end

densityG (generic function with 4 methods)

In [16]:
function sp_powermethod(A::SparseMatrixCSC,maxitr=200)
    dim=size(A,2)
    u=[1/size(A,1) for i=1:size(A,1)]
    u=sparse(u)
    #large=0.0
    itr=0
    #err=1
    while(itr<maxitr)
        #mu=SparseMatrixCSC{Float64,Int64}()
        v=A*u;
        mu=sum(v);
        #err=abs(large-mu)
        #print(large," ")
        u=v/mu;
        itr=itr+1
    end
    return u
end

sp_powermethod (generic function with 2 methods)

In [17]:
function pagerank(A::SparseMatrixCSC,eps=10.0^(-8),maxitr=200,d=0.85)
    dim=size(A,2)
    v=[1/dim for i=1:dim]
    v=sparse(v)
    itr=0
    last_v = sparse(ones(dim, 1)) * Inf
    while(itr<maxitr && norm(v - last_v,2)>eps)
        last_v = v;
        v = d.*(A*v) + ((1-d)/dim)*sparse(ones(dim, 1))
        itr += 1
    end
    return v
end

pagerank (generic function with 4 methods)

In [18]:
function plotGraphCluster(G,S,u,filename)
    #A=(laplacian_matrix(G));
    #aG=Array{Array{Int64,1},1}()
    #for i in vertices(G)
    #    push!(aG,(neighbors(G,i)))
    #end
    #large,v=sp_powermethod(A,error,maxitr);
    #S,u=ToMaTo.Clustering(aG,v,tau)
    um=copy(u)
    for i=1:size(um,1)
        for j=1:size(S,1)
            if um[i]==S[j][1]
                um[i]=j
            end
        end
    end
    for j=1:size(um,1)
        if um[j]>size(S,1)
            um[j]=size(S,1)+1
        end
    end
    if size(S,1)!=0
        colors=[RGB.(rand(),rand(),rand()) for i=1:size(S,1)+1]
        colors[end]=RGB.(0,0,0)

        membership=um
        nodecolor = colors
        nodefillc = nodecolor[membership]
        gp=gplot(G,nodefillc=nodefillc)
        draw(PNG(filename, 100cm, 100cm), gp)
        return S,um,gp
    else
        return 1
    end
end

plotGraphCluster (generic function with 1 method)

In [19]:
#=
colors=[RGB.(rand(),rand(),rand()) for i=1:43]
colors[end]=RGB.(0,0,0)
membership=A
nodecolor = colors
nodefillc = nodecolor[membership]
draw(PNG("graph.png", 100cm, 100cm), gplot(G,nodefillc=nodefillc))
=#

In [20]:
C=readdlm(joinpath("citation.txt"))

LoadError: ArgumentError: Cannot open 'citation.txt': not a file

In [9]:
C=Int.(C)

352807×2 Array{Int64,2}:
    1001  9304045
    1001  9308122
    1001  9309097
    1001  9311042
    1001  9401139
    1001  9404151
    1001  9407087
    1001  9408099
    1001  9501030
    1001  9503124
    1001  9504090
    1001  9504145
    1001  9505025
       ⋮         
 9912260  9704080
 9912260  9902024
 9912260  9908157
 9912270  9306153
 9912286  9707163
 9912286  9802015
 9912286  9802074
 9912286  9805150
 9912286  9806074
 9912286  9808140
 9912286  9810068
 9912286  9901023

In [10]:
G=SimpleDiGraph(27770)

{27770, 0} directed simple Int64 graph

In [11]:
S=Set(C)

Set([9511100, 203011, 108007, 9410150, 9909074, 9908127, 1029, 205205, 9702096, 9502016  …  9401141, 9802019, 1011, 9602001, 9503134, 5152, 9404155, 9412200, 9903136, 9312134])

In [12]:
vertices  =  [s for s in S];

In [13]:
dict1= Dict(zip(1:27770,vertices));
dict2= Dict(zip(vertices,1:27770));

In [14]:
for i =1:352807
    add_edge!(G,dict2[C[i,1]],dict2[C[i,2]])
end

In [15]:
G

{27770, 352807} directed simple Int64 graph

In [16]:
v1= zeros(352807)
for i=1:size(v1,1)
    v1[i]=dict2[C[i,1]]
end
v2= zeros(352807)
for i=1:size(v2,1)
    v2[i]=dict2[C[i,2]]
end
v3=Int.([v1 v2])

352807×2 Array{Int64,2}:
  4696  20164
  4696   8517
  4696   9312
  4696   9582
  4696  18260
  4696  17752
  4696   9438
  4696  25879
  4696  15836
  4696  24675
  4696  27148
  4696  16654
  4696   9445
     ⋮       
 13055  25591
 13055  16879
 13055  21880
 20601  19672
  5720  14471
  5720  23506
  5720  25030
  5720  19177
  5720  20279
  5720  25878
  5720  12397
  5720   9583

In [17]:
B=float.(adjacency_matrix(G)')

27770×27770 SparseMatrixCSC{Float64,Int64} with 352807 stored entries:
  [1763 ,     1]  =  1.0
  [3069 ,     1]  =  1.0
  [11042,     1]  =  1.0
  [25782,     1]  =  1.0
  [1246 ,     3]  =  1.0
  [2136 ,     3]  =  1.0
  [2477 ,     3]  =  1.0
  [4010 ,     3]  =  1.0
  [6649 ,     3]  =  1.0
  [6994 ,     3]  =  1.0
  ⋮
  [16920, 27768]  =  1.0
  [18260, 27768]  =  1.0
  [19297, 27768]  =  1.0
  [19954, 27768]  =  1.0
  [22396, 27768]  =  1.0
  [22955, 27768]  =  1.0
  [23738, 27768]  =  1.0
  [25161, 27768]  =  1.0
  [25879, 27768]  =  1.0
  [26617, 27768]  =  1.0
  [257  , 27770]  =  1.0

In [18]:
G2=weakly_connected_components(G)

143-element Array{Array{Int64,1},1}:
 [1, 3, 4, 5, 6, 7, 8, 9, 10, 11  …  27761, 27762, 27763, 27764, 27765, 27766, 27767, 27768, 27769, 27770]
 [2, 12899]                                                                                               
 [34, 18915]                                                                                              
 [93, 6384, 8849, 16112]                                                                                  
 [111, 18973]                                                                                             
 [159, 5056, 14644]                                                                                       
 [238, 4174, 24255]                                                                                       
 [248, 5287]                                                                                              
 [264, 295, 1082, 1368, 5261, 7960, 11388, 15969, 27388, 27499]                                           


In [19]:
G2[1]

27400-element Array{Int64,1}:
     1
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
     ⋮
 27759
 27760
 27761
 27762
 27763
 27764
 27765
 27766
 27767
 27768
 27769
 27770

In [20]:
G3=[G2[1] 1:27400]

27400×2 Array{Int64,2}:
     1      1
     3      2
     4      3
     5      4
     6      5
     7      6
     8      7
     9      8
    10      9
    11     10
    12     11
    13     12
    14     13
     ⋮       
 27759  27389
 27760  27390
 27761  27391
 27762  27392
 27763  27393
 27764  27394
 27765  27395
 27766  27396
 27767  27397
 27768  27398
 27769  27399
 27770  27400

In [21]:
Set2=Set([s for s in G2[1]])
Ss=[s for s in Set2]

27400-element Array{Int64,1}:
 15769
 22035
  6265
  1333
  3120
  9911
 11942
 19698
 10458
  7237
 25568
 25786
 23940
     ⋮
  2151
 23717
  9560
 27591
 17344
  1535
 24551
 12881
 20166
 21184
  9783
  9775

In [22]:
dict3= Dict(zip(1:27400,G2[1]));
dict4= Dict(zip(G2[1],1:27400));

In [23]:
G2[1]

27400-element Array{Int64,1}:
     1
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
     ⋮
 27759
 27760
 27761
 27762
 27763
 27764
 27765
 27766
 27767
 27768
 27769
 27770

In [24]:
cG=SimpleDiGraph(27400)

{27400, 0} directed simple Int64 graph

In [25]:
for i=1:352807
    if dict2[C[i,1]] in Set2 && dict2[C[i,2]] in Set2
        add_edge!(cG,dict4[dict2[C[i,1]]],dict4[dict2[C[i,2]]])
    end
end

In [26]:
cG

{27400, 352542} directed simple Int64 graph

In [27]:
f=densityG(cG,0,200)

27400×1 SparseMatrixCSC{Float64,Int64} with 27400 stored entries:
  [1    ,     1]  =  1.45494e-5
  [2    ,     1]  =  9.74657e-6
  [3    ,     1]  =  8.68358e-6
  [4    ,     1]  =  5.47445e-6
  [5    ,     1]  =  5.84719e-6
  [6    ,     1]  =  6.63903e-6
  [7    ,     1]  =  7.03389e-6
  [8    ,     1]  =  1.03423e-5
  [9    ,     1]  =  1.23436e-5
  [10   ,     1]  =  5.47445e-6
  ⋮
  [27390,     1]  =  5.62165e-6
  [27391,     1]  =  7.93222e-6
  [27392,     1]  =  1.78188e-5
  [27393,     1]  =  6.3729e-6
  [27394,     1]  =  7.0801e-6
  [27395,     1]  =  1.15147e-5
  [27396,     1]  =  1.52271e-5
  [27397,     1]  =  7.75001e-6
  [27398,     1]  =  4.36073e-5
  [27399,     1]  =  2.17452e-5
  [27400,     1]  =  1.01277e-5

In [83]:
aG=Array{Array{Int64,1},1}()
for i=1:27400
    push!(aG,(all_neighbors(cG,i)))
end
aG

27400-element Array{Array{Int64,1},1}:
 [1736, 3027, 10894, 25450, 261, 1241, 7921, 8511, 10743, 11836, 15766, 16167, 22612, 25859]                                                   
 [1225, 2107, 2442, 3954, 6560, 6900, 8893, 10348, 10519, 10628  …  11563, 11945, 13707, 15963, 17119, 18808, 20743, 20991, 21094, 24622]      
 [10363, 16730, 26230]                                                                                                                         
 [1538]                                                                                                                                        
 [9265, 13030, 13774, 21913, 23554, 24768, 27377]                                                                                              
 [57, 392, 1182, 2765, 3140, 4383, 5239, 5453, 5887, 6955  …  24356, 24531, 24954, 2325, 4563, 12848, 15529, 19145, 21670, 22963]              
 [2389, 3209, 4970, 5401, 5090, 18488, 20394]                                                    

In [84]:
S1,u1=ToMaTo.Clustering(aG,Array(f),0.0001)

(Array{Int64,1}[[25558, 9682, 5527, 14108, 14316, 10009, 3369, 10875, 25563, 1246  …  25862, 25865, 26005, 26041, 26098, 26109, 26609, 26618, 26718, 27015], [19151, 23429, 22208, 25357, 23321, 8585, 5487, 3986, 21110, 4061  …  23166, 24190, 25611, 25682, 25771, 25936, 26007, 26072, 26761, 26921], [1925, 2887, 19116, 15975, 234, 25396, 7263, 10973, 14181, 16479  …  8235, 8657, 10460, 11079, 14221, 17146, 17875, 20476, 21178, 27069], [10713, 14057, 23048, 10283, 136, 24436, 27200, 9256, 9462, 9966  …  21966, 22837, 23542, 23549, 24908, 25322, 25648, 25931, 26453, 27288], [19521, 4363, 17157, 11169, 19175, 17633, 22365, 898, 1887, 25125  …  19343, 20211, 20570, 20630, 20862, 23874, 24193, 24394, 25494, 26191], [193, 19350, 11472, 8106, 18462, 7760, 12986, 9396, 13983, 10232  …  15346, 17370, 19758, 19836, 20256, 20932, 21179, 21870, 23130, 25134], [16726, 15434, 25767, 17080, 7865, 2893, 20921, 8036, 3023, 7420  …  18610, 19482, 20289, 21502, 23760, 24097, 24638, 26462, 26904, 27382], [25

In [85]:
S1

30-element Array{Array{Int64,1},1}:
 [25558, 9682, 5527, 14108, 14316, 10009, 3369, 10875, 25563, 1246  …  25862, 25865, 26005, 26041, 26098, 26109, 26609, 26618, 26718, 27015] 
 [19151, 23429, 22208, 25357, 23321, 8585, 5487, 3986, 21110, 4061  …  23166, 24190, 25611, 25682, 25771, 25936, 26007, 26072, 26761, 26921] 
 [1925, 2887, 19116, 15975, 234, 25396, 7263, 10973, 14181, 16479  …  8235, 8657, 10460, 11079, 14221, 17146, 17875, 20476, 21178, 27069]    
 [10713, 14057, 23048, 10283, 136, 24436, 27200, 9256, 9462, 9966  …  21966, 22837, 23542, 23549, 24908, 25322, 25648, 25931, 26453, 27288]  
 [19521, 4363, 17157, 11169, 19175, 17633, 22365, 898, 1887, 25125  …  19343, 20211, 20570, 20630, 20862, 23874, 24193, 24394, 25494, 26191] 
 [193, 19350, 11472, 8106, 18462, 7760, 12986, 9396, 13983, 10232  …  15346, 17370, 19758, 19836, 20256, 20932, 21179, 21870, 23130, 25134]  
 [16726, 15434, 25767, 17080, 7865, 2893, 20921, 8036, 3023, 7420  …  18610, 19482, 20289, 21502, 23760, 24097, 

In [64]:
f2=zeros(27400)

27400-element Array{Float64,1}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 ⋮  
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

In [81]:
for i = 1:27400
    f2[i]=size(inneighbors(cG,i),1)
end

In [82]:
f2

27400-element Array{Float64,1}:
 10.0
 21.0
  3.0
  0.0
  1.0
  7.0
  3.0
  2.0
  5.0
  0.0
  1.0
  5.0
  1.0
  ⋮  
  0.0
  1.0
  1.0
  4.0
  4.0
  2.0
 13.0
 14.0
  4.0
 26.0
  7.0
  1.0

In [86]:
S2,u2=ToMaTo.Clustering(aG,f2,10)

(Array{Int64,1}[[15752, 18636, 22369, 23997, 7991, 15874, 24648, 24641, 5005, 7391  …  19516, 21899, 24987, 918, 3038, 5733, 9218, 12856, 26421, 26565], [23935, 6126, 19906, 6106, 8408, 17322], [10716, 5826, 6221, 22475, 18393, 317, 14947, 18987, 5963, 25417  …  764, 3557, 17411, 12746, 15469, 17348, 18085, 19164, 23572, 25910], [22619, 25866, 4892, 14451, 27356, 1292, 12204, 16746, 120, 22627, 7473, 7964, 10422, 11729, 18944, 23105, 27251], [23429, 25357, 23321, 19151, 20631, 22208, 842, 3986, 5487, 3888  …  22656, 22877, 24190, 24939, 25611, 25682, 25771, 25936, 26605, 26761], [10063, 16803, 26606, 1425, 10726, 15354, 24908], [14345, 13679, 6152, 10122, 25171, 18355, 26296, 8347, 26175, 6718  …  23258, 2111, 5816, 6782, 7258, 21573, 25370, 3791, 10409, 26755], [26479, 15449, 23890, 15028, 10051, 8763, 12524, 12430, 1156, 4896, 26459, 27174, 7834, 14860, 16593, 20605], [19521, 19175, 17633, 25125, 22365, 4363, 10170, 18353, 21223, 5116  …  18553, 19343, 20211, 20570, 20630, 23874, 241

In [88]:
S2

58-element Array{Array{Int64,1},1}:
 [15752, 18636, 22369, 23997, 7991, 15874, 24648, 24641, 5005, 7391  …  19516, 21899, 24987, 918, 3038, 5733, 9218, 12856, 26421, 26565]      
 [23935, 6126, 19906, 6106, 8408, 17322]                                                                                                      
 [10716, 5826, 6221, 22475, 18393, 317, 14947, 18987, 5963, 25417  …  764, 3557, 17411, 12746, 15469, 17348, 18085, 19164, 23572, 25910]      
 [22619, 25866, 4892, 14451, 27356, 1292, 12204, 16746, 120, 22627, 7473, 7964, 10422, 11729, 18944, 23105, 27251]                            
 [23429, 25357, 23321, 19151, 20631, 22208, 842, 3986, 5487, 3888  …  22656, 22877, 24190, 24939, 25611, 25682, 25771, 25936, 26605, 26761]   
 [10063, 16803, 26606, 1425, 10726, 15354, 24908]                                                                                             
 [14345, 13679, 6152, 10122, 25171, 18355, 26296, 8347, 26175, 6718  …  23258, 2111, 5816, 6782, 7258, 215

In [91]:
points=0
for s in S1
    points = points + size(s,1)
end

In [92]:
points

27400

In [38]:
adj= adjacency_matrix(cG)

27400×27400 SparseMatrixCSC{Int64,Int64} with 352542 stored entries:
  [261  ,     1]  =  1
  [1241 ,     1]  =  1
  [7921 ,     1]  =  1
  [8511 ,     1]  =  1
  [10743,     1]  =  1
  [11836,     1]  =  1
  [15766,     1]  =  1
  [16167,     1]  =  1
  [22612,     1]  =  1
  [25859,     1]  =  1
  ⋮
  [25062, 27398]  =  1
  [25637, 27398]  =  1
  [26742, 27398]  =  1
  [3725 , 27399]  =  1
  [4324 , 27399]  =  1
  [5615 , 27399]  =  1
  [10064, 27399]  =  1
  [15014, 27399]  =  1
  [19021, 27399]  =  1
  [25251, 27399]  =  1
  [12456, 27400]  =  1

In [39]:
#using Junet

In [40]:
#draw(PNG("graphsvg.svg", 100cm, 100cm), gplot(cG))

In [41]:
#plotGraphCluster(cG,S1,u1)

In [42]:
function plotGraphCluster2(G,S,u)
    #A=(laplacian_matrix(G));
    #aG=Array{Array{Int64,1},1}()
    #for i in vertices(G)
    #    push!(aG,(neighbors(G,i)))
    #end
    #large,v=sp_powermethod(A,error,maxitr);
    #S,u=ToMaTo.Clustering(aG,v,tau)
    u1=copy(u)
    for i=1:size(u1,1)
        for j=1:size(S,1)
            if u1[i]==S[j][1]
                u1[i]=j
            end
        end
    end
    for j=1:size(u1,1)
        if u1[j]>size(S,1)
            u1[j]=size(S,1)+1
        end
    end
    if size(S,1)!=0
        colors=[RGB.(rand(),rand(),rand()) for i=1:size(S,1)+1]
        colors[end]=RGB.(0,0,0)

        membership=u1
        nodecolor = colors
        nodefillc = nodecolor[membership]
        gp=gplot(G,nodefillc=nodefillc)
        draw(SVG("graph2.SVG", 100cm, 100cm), gp)
        return S,u1,gp
    else
        return 1
    end
end

plotGraphCluster2 (generic function with 1 method)

In [43]:
#plotGraphCluster2(cG,S2,u2)

In [44]:
#S2,u21,gp2=plotGraphCluster2(cG,S2,u2)

In [45]:
#S2,u11,gp1=plotGraphCluster(cG,S1,u1)

In [46]:
#draw(PNG("graph2.PNG", 100cm, 100cm), gp2)

In [47]:
cG

{27400, 352542} directed simple Int64 graph

In [48]:
using GraphIO, LightGraphs

In [49]:
savegraph("cG",cG)

1

In [50]:
#savegraph(".\\testlist.txt", cG,"test", C())

In [51]:
cG

{27400, 352542} directed simple Int64 graph

In [52]:
EdgeListFormat()

GraphIO.EdgeListFormat()

In [53]:
#gp2

In [54]:
#draw(PNG("filename.png", 100cm, 100cm), gp2)

In [55]:
points=0
for i in S1
    points += size(i,1)
end
points

27400

In [56]:
S2

283-element Array{Array{Int64,1},1}:
 [5949, 20616, 21662, 4210, 17182, 25401, 26986, 27077, 6424, 9124  …  10455, 14995, 20947, 989, 14828, 19901, 20002, 22229, 1768, 20674]
 [22266, 9329, 26607, 17741]                                                                                                             
 [18742, 9332, 2405]                                                                                                                     
 [10786, 6869, 10662, 6897, 4828, 6948, 25383]                                                                                           
 [18886, 1803, 10997, 25456, 10924, 26435, 10076, 21272]                                                                                 
 [12213]                                                                                                                                 
 [19106]                                                                                                                               

In [93]:
S1,um1,gp1=plotGraphCluster(cG,S1,u1,"fucku.png")

(Array{Int64,1}[[25558, 9682, 5527, 14108, 14316, 10009, 3369, 10875, 25563, 1246  …  25862, 25865, 26005, 26041, 26098, 26109, 26609, 26618, 26718, 27015], [19151, 23429, 22208, 25357, 23321, 8585, 5487, 3986, 21110, 4061  …  23166, 24190, 25611, 25682, 25771, 25936, 26007, 26072, 26761, 26921], [1925, 2887, 19116, 15975, 234, 25396, 7263, 10973, 14181, 16479  …  8235, 8657, 10460, 11079, 14221, 17146, 17875, 20476, 21178, 27069], [10713, 14057, 23048, 10283, 136, 24436, 27200, 9256, 9462, 9966  …  21966, 22837, 23542, 23549, 24908, 25322, 25648, 25931, 26453, 27288], [19521, 4363, 17157, 11169, 19175, 17633, 22365, 898, 1887, 25125  …  19343, 20211, 20570, 20630, 20862, 23874, 24193, 24394, 25494, 26191], [193, 19350, 11472, 8106, 18462, 7760, 12986, 9396, 13983, 10232  …  15346, 17370, 19758, 19836, 20256, 20932, 21179, 21870, 23130, 25134], [16726, 15434, 25767, 17080, 7865, 2893, 20921, 8036, 3023, 7420  …  18610, 19482, 20289, 21502, 23760, 24097, 24638, 26462, 26904, 27382], [25