In [1]:
import sys
!{sys.executable} -m pip install snap-stanford



In [109]:
import snap
import pandas as pd
import numpy as np
import operator
import statistics

## Facebook

In [85]:
nodes = list(range(100,250))
count = len(nodes)

In [83]:
fb = snap.LoadEdgeList(snap.TUNGraph, "facebook_combined.txt", 0, 1)
sub_fb = fb.GetSubGraph(nodes)
sub_fb.GetEdges()

400

### FB network data has been preprocessed. A subgraph of the network with 150 nodes and 400 edges will be analyzed.

In [84]:
labels_fb = {}
for NI in sub_fb.Nodes():
    labels_fb[NI.GetId()] = str(NI.GetId())
sub_fb.DrawGViz(snap.gvlNeato, "output_fb3.png", " ", labels_fb)

![alt text](output_fb3.png)

### Average distance and diameter

In [89]:
result = sub_fb.GetBfsEffDiam(count, nodes, False)
avg_dist = result[0]
diameter = result[2]

In [100]:
dist = pd.DataFrame([[avg_dist, diameter]], columns=['Avg Distance', 'Diameter'])
dist

Unnamed: 0,Avg Distance,Diameter
0,4.512939,9


### Degree centrality

In [119]:
dc = {}
for NI in sub_fb.Nodes():
    dc[NI.GetId()] = sub_fb.GetDegreeCentr(NI.GetId())

In [120]:
max_dc = max(dc.items(), key=operator.itemgetter(1))[0]
max_dc

119

In [121]:
avg_dc = statistics.mean(dc.values())
avg_dc

0.035794183445190156

In [125]:
dcs = pd.DataFrame([[max_dc, dc[max_dc], avg_dc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
dcs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,119,0.174497,0.035794


### Betweenness centrality

In [116]:
bc = {}
Nodes, Edges = sub_fb.GetBetweennessCentr(1.0)

for n in Nodes:
    bc[n] = Nodes[n]

In [117]:
max_bc = max(bc.items(), key=operator.itemgetter(1))[0]
max_bc

119

In [118]:
avg_bc = statistics.mean(bc.values())
avg_bc

65.27333333333334

In [126]:
bcs = pd.DataFrame([[max_bc, bc[max_bc], avg_bc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
bcs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,119,1032.136276,65.273333


### Closeness centrality

In [127]:
cc = {}
for NI in sub_fb.Nodes():
    cc[NI.GetId()] = sub_fb.GetClosenessCentr(NI.GetId())

In [128]:
max_cc = max(cc.items(), key=operator.itemgetter(1))[0]
max_cc

239

In [129]:
avg_cc = statistics.mean(cc.values())
avg_cc

0.1374593159491711

In [130]:
ccs = pd.DataFrame([[max_cc, bc[max_cc], avg_cc]], 
                     columns=['Node w/Max Centrality', 'Max Centrality Value', 'Avg Centrality Value'])
ccs

Unnamed: 0,Node w/Max Centrality,Max Centrality Value,Avg Centrality Value
0,239,444.355048,0.137459


### Clustering coefficient

In [131]:
ccf = sub_fb.GetClustCf(-1)
ccf

0.37629102136253845

### Bridges

In [134]:
n1 = []
n2 = []
b = sub_fb.GetEdgeBridges()
for e in b:
    n1.append(e.GetVal1())
    n2.append(e.GetVal2())

In [136]:
bs = pd.DataFrame(columns=['1st Node of Bridge', '2nd Node of Bridge'])
bs['1st Node of Bridge'] = n1
bs['2nd Node of Bridge'] = n2
bs

Unnamed: 0,1st Node of Bridge,2nd Node of Bridge
0,120,247
1,120,136
2,166,198
3,163,173
4,130,191
5,155,157
6,124,157
7,124,170
8,159,172
9,117,231


In [137]:
bridge_count = len(bs)
bridge_count

29

## Twitter

In [145]:
f=open('twitter_trunc.txt',"rb")
lines=f.readlines()
nodes=[]
for x in lines:
    nodes.append(x.split(' ')[0])
f.close()
nodes

TypeError: a bytes-like object is required, not 'str'

In [138]:
count = len(nodes)

In [140]:
tt = snap.LoadEdgeList(snap.TNGraph, "twitter_combined.txt", 0, 1)

In [141]:
sub_tt = tt.GetSubGraph(nodes)
sub_tt.GetEdges()

0

In [None]:
labels_tt = {}
for NI in sub_tt.Nodes():
    labels_tt[NI.GetId()] = str(NI.GetId())
sub_tt.DrawGViz(snap.gvlNeato, "output_tt.png", " ", labels_tt)