In [1]:
import sys
!{sys.executable} -m pip install snap-stanford



In [1]:
import snap
import pandas as pd
import numpy as np
import operator
import statistics
import chardet

## Facebook (undirected)

In [138]:
nodes = list(range(100,250))
count = len(nodes)

In [139]:
fb = snap.LoadEdgeList(snap.TUNGraph, "facebook_combined.txt", 0, 1)
sub_fb = fb.GetSubGraph(nodes)
sub_fb.GetEdges()

400

### FB network data has been preprocessed. A subgraph of the network with 150 nodes and 400 edges will be analyzed.

In [140]:
labels_fb = {}
for NI in sub_fb.Nodes():
    labels_fb[NI.GetId()] = str(NI.GetId())
sub_fb.DrawGViz(snap.gvlNeato, "output_fb3.png", " ", labels_fb)

![alt text](output_fb3.png)

### Average distance and diameter

In [141]:
result = sub_fb.GetBfsEffDiam(count, nodes, False)
avg_dist = result[0]
diameter = result[2]

In [142]:
dist = pd.DataFrame([[avg_dist, diameter]], columns=['Avg Distance', 'Diameter'])
dist

Unnamed: 0,Avg Distance,Diameter
0,4.512939,9


### Degree centrality

In [143]:
dc = {}
for NI in sub_fb.Nodes():
    dc[NI.GetId()] = sub_fb.GetDegreeCentr(NI.GetId())

In [144]:
max_dc = max(dc.items(), key=operator.itemgetter(1))[0]
max_dc

119

In [145]:
avg_dc = statistics.mean(dc.values())
avg_dc

0.035794183445190156

In [146]:
dcs = pd.DataFrame([[max_dc, dc[max_dc], avg_dc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
dcs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,119,0.174497,0.035794


### Betweenness centrality

In [147]:
bc = {}
Nodes, Edges = sub_fb.GetBetweennessCentr(1.0)

for n in Nodes:
    bc[n] = Nodes[n]

In [148]:
max_bc = max(bc.items(), key=operator.itemgetter(1))[0]
max_bc

119

In [149]:
avg_bc = statistics.mean(bc.values())
avg_bc

65.27333333333334

In [150]:
bcs = pd.DataFrame([[max_bc, bc[max_bc], avg_bc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
bcs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,119,1032.136276,65.273333


### Closeness centrality

In [151]:
cc = {}
for NI in sub_fb.Nodes():
    cc[NI.GetId()] = sub_fb.GetClosenessCentr(NI.GetId())

In [152]:
max_cc = max(cc.items(), key=operator.itemgetter(1))[0]
max_cc

239

In [153]:
avg_cc = statistics.mean(cc.values())
avg_cc

0.1374593159491711

In [154]:
ccs = pd.DataFrame([[max_cc, cc[max_cc], avg_cc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
ccs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,239,0.297677,0.137459


### Clustering coefficient

In [131]:
ccf = sub_fb.GetClustCf(-1)
ccf

0.37629102136253845

### Bridges

In [134]:
n1 = []
n2 = []
b = sub_fb.GetEdgeBridges()
for e in b:
    n1.append(e.GetVal1())
    n2.append(e.GetVal2())

In [136]:
bs = pd.DataFrame(columns=['1st Node of Bridge', '2nd Node of Bridge'])
bs['1st Node of Bridge'] = n1
bs['2nd Node of Bridge'] = n2
bs

Unnamed: 0,1st Node of Bridge,2nd Node of Bridge
0,120,247
1,120,136
2,166,198
3,163,173
4,130,191
5,155,157
6,124,157
7,124,170
8,159,172
9,117,231


In [137]:
bridge_count = len(bs)
bridge_count

29

## Twitch (undirected)

In [118]:
# get first 150 unique nodes in Twitch graph
nodes = []
n = 150
counter = 0

with open('twitch8.txt', encoding="utf8") as infile:
    for line in infile:
        if counter == n: 
            break
            
        node = int(line.split()[0])
        if node not in nodes:
            nodes.append(node)
            counter = counter + 1

In [119]:
count = len(nodes)
count

150

In [120]:
tw = snap.LoadEdgeList(snap.TUNGraph, "twitch8.txt", 0, 1)
tw.GetEdges()

426800

In [121]:
sub_tw = tw.GetSubGraph(nodes)
sub_tw.GetEdges()

422

### Twitch network data has been preprocessed. A subgraph of the network with 150 nodes and 422 edges will be analyzed.

In [122]:
labels_tw = {}
for NI in sub_tw.Nodes():
    labels_tw[NI.GetId()] = str(NI.GetId())
sub_tw.DrawGViz(snap.gvlNeato, "output_tw.png", " ", labels_tw)

![alt text](output_tw.png)

### Average distance and diameter

In [123]:
result = sub_tw.GetBfsEffDiam(count, nodes, False)
avg_dist = result[0]
diameter = result[2]

In [124]:
dist = pd.DataFrame([[avg_dist, diameter]], columns=['Avg Distance', 'Diameter'])
dist

Unnamed: 0,Avg Distance,Diameter
0,3.670365,6


### Degree centrality

In [125]:
dc = {}
for NI in sub_tw.Nodes():
    dc[NI.GetId()] = sub_tw.GetDegreeCentr(NI.GetId())

In [126]:
max_dc = max(dc.items(), key=operator.itemgetter(1))[0]
max_dc

127

In [127]:
avg_dc = statistics.mean(dc.values())
avg_dc

0.03776286353467562

In [128]:
dcs = pd.DataFrame([[max_dc, dc[max_dc], avg_dc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
dcs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,127,0.348993,0.037763


### Betweenness centrality

In [129]:
bc = {}
Nodes, Edges = sub_tw.GetBetweennessCentr(1.0)

for n in Nodes:
    bc[n] = Nodes[n]

In [130]:
max_bc = max(bc.items(), key=operator.itemgetter(1))[0]
max_bc

127

In [131]:
avg_bc = statistics.mean(bc.values())
avg_bc

128.34666666666666

In [132]:
bcs = pd.DataFrame([[max_bc, bc[max_bc], avg_bc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
bcs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,127,3321.937563,128.346667


### Closeness centrality

In [133]:
cc = {}
for NI in sub_tw.Nodes():
    cc[NI.GetId()] = sub_tw.GetClosenessCentr(NI.GetId())

In [134]:
max_cc = max(cc.items(), key=operator.itemgetter(1))[0]
max_cc

127

In [135]:
avg_cc = statistics.mean(cc.values())
avg_cc

0.32140222401595264

In [137]:
ccs = pd.DataFrame([[max_cc, cc[max_cc], avg_cc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
ccs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,127,0.52453,0.321402


### Clustering coefficient

In [155]:
ccf = sub_tw.GetClustCf(-1)
ccf

0.1814332303155832

In [156]:
n1 = []
n2 = []
b = sub_tw.GetEdgeBridges()
for e in b:
    n1.append(e.GetVal1())
    n2.append(e.GetVal2())

In [157]:
bs = pd.DataFrame(columns=['1st Node of Bridge', '2nd Node of Bridge'])
bs['1st Node of Bridge'] = n1
bs['2nd Node of Bridge'] = n2
bs

Unnamed: 0,1st Node of Bridge,2nd Node of Bridge
0,8,67
1,18,77
2,32,63
3,93,6027
4,115,142
5,44,133
6,77,3504
7,90,97
8,97,136
9,85,138


In [158]:
bridge_count = len(bs)
bridge_count

19

## Twitter (directed)

In [60]:
# get first 150 unique nodes in Twitter graph
nodes = []
n = 150
counter = 0

with open('twitter_combined.txt', encoding="utf8") as infile:
    for line in infile:
        if counter == n: 
            break
            
        node = int(line.split()[0])
        if node not in nodes:
            nodes.append(node)
            counter = counter + 1

In [61]:
count = len(nodes)
count

150

In [7]:
tt = snap.LoadEdgeList(snap.TNGraph, "twitter_combined.txt", 0, 1)
tt.GetEdges()

1768149

In [23]:
sub_tt = tt.GetSubGraph(nodes)
sub_tt.GetEdges()

5731

### Twitter network data has been preprocessed. A subgraph of the network with 150 nodes and 5731 edges will be analyzed.

In [24]:
labels_tt = {}
for NI in sub_tt.Nodes():
    labels_tt[NI.GetId()] = str(NI.GetId())
sub_tt.DrawGViz(snap.gvlNeato, "output_tt2.png", " ", labels_tt)

![alt text](output_tt2.png)

### Average distance and diameter

In [27]:
result = sub_tt.GetBfsEffDiam(count, nodes, False)
avg_dist = result[0]
diameter = result[2]

In [28]:
dist = pd.DataFrame([[avg_dist, diameter]], columns=['Avg Distance', 'Diameter'])
dist

Unnamed: 0,Avg Distance,Diameter
0,1.902125,4


### Degree centrality

In [59]:
# cannot calculate degree centrality for a directed graph

### Betweenness centrality

In [62]:
bc = {}
Nodes, Edges = sub_tt.GetBetweennessCentr(1.0)

for n in Nodes:
    bc[n] = Nodes[n]

In [63]:
max_bc = max(bc.items(), key=operator.itemgetter(1))[0]
max_bc

40981798

In [64]:
avg_bc = statistics.mean(bc.values())
avg_bc

52.19999999999999

In [65]:
bcs = pd.DataFrame([[max_bc, bc[max_bc], avg_bc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
bcs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,40981798,659.361337,52.2


### Closeness centrality

In [38]:
cc = {}
for NI in sub_tt.Nodes():
    cc[NI.GetId()] = sub_tt.GetClosenessCentr(NI.GetId())

In [39]:
max_cc = max(cc.items(), key=operator.itemgetter(1))[0]
max_cc

43003845

In [40]:
avg_cc = statistics.mean(cc.values())
avg_cc

0.6003720846757238

In [41]:
ccs = pd.DataFrame([[max_cc, cc[max_cc], avg_cc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
ccs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,43003845,0.908537,0.600372


### Clustering coefficient

In [66]:
ccf = sub_tt.GetClustCf(-1)
ccf

0.6472680381789766

### Bridges

In [68]:
# cannot find bridges for a directed graph

## Who-trusts-who network from Epinions.net (directed)

In [159]:
# get first 150 unique nodes in Epinions graph
nodes = []
n = 150
counter = 0

with open('soc-Epinions1.txt', encoding="utf8") as infile:
    for line in infile:
        if counter == n: 
            break
            
        node = int(line.split()[0])
        if node not in nodes:
            nodes.append(node)
            counter = counter + 1

In [160]:
count = len(nodes)
count

150

In [161]:
ep = snap.LoadEdgeList(snap.TNGraph, "soc-Epinions1.txt", 0, 1)
ep.GetEdges()

508837

In [162]:
sub_ep = ep.GetSubGraph(nodes)
sub_ep.GetEdges()

4163

### Epinions.net network data has been preprocessed. A subgraph of the network with 150 nodes and 4163 edges will be analyzed.

In [163]:
labels_ep = {}
for NI in sub_ep.Nodes():
    labels_ep[NI.GetId()] = str(NI.GetId())
sub_ep.DrawGViz(snap.gvlNeato, "output_ep.png", " ", labels_ep)

![alt text](output_ep.png)

### Average distance and diameter

In [165]:
result = sub_ep.GetBfsEffDiam(count, nodes, False)
avg_dist = result[0]
diameter = result[2]

In [166]:
dist = pd.DataFrame([[avg_dist, diameter]], columns=['Avg Distance', 'Diameter'])
dist

Unnamed: 0,Avg Distance,Diameter
0,1.869724,3


### Degree centrality

In [168]:
# cannot calculate degree centrality for a directed graph

### Betweenness centrality

In [169]:
bc = {}
Nodes, Edges = sub_ep.GetBetweennessCentr(1.0)

for n in Nodes:
    bc[n] = Nodes[n]

In [170]:
max_bc = max(bc.items(), key=operator.itemgetter(1))[0]
max_bc

0

In [171]:
avg_bc = statistics.mean(bc.values())
avg_bc

56.81333333333333

In [172]:
bcs = pd.DataFrame([[max_bc, bc[max_bc], avg_bc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
bcs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,0,2657.439401,56.813333


### Closeness centrality

In [173]:
cc = {}
for NI in sub_ep.Nodes():
    cc[NI.GetId()] = sub_ep.GetClosenessCentr(NI.GetId())

In [174]:
max_cc = max(cc.items(), key=operator.itemgetter(1))[0]
max_cc

0

In [175]:
avg_cc = statistics.mean(cc.values())
avg_cc

0.5728522850449893

In [176]:
ccs = pd.DataFrame([[max_cc, cc[max_cc], avg_cc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
ccs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,0,0.986755,0.572852


### Clustering coefficient

In [177]:
ccf = sub_ep.GetClustCf(-1)
ccf

0.5364951966600079

### Bridges

In [178]:
# cannot find bridges for a directed graph