# **An In-depth Guide to Social Network Analysis**

Copyright @ 2020 **ABCOM Information Systems Pvt. Ltd.** All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

See the License for the specific language governing permissions and limitations under the License.

In [None]:
import random
from tqdm import tqdm
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from zipfile import ZipFile

# Loading data

In [None]:
!wget https://github.com/abcom-mltutorials/Facebook-Social-Network-Analysis/archive/master.zip -P "/content"
ZipFile("/content/master.zip").extractall("/content/")

# Creating dataframe from 'fb_combined.txt'

In [None]:
#this file contains information of all the existing edges
fb = pd.read_csv('/content/Facebook-Social-Network-Analysis-master/facebook_combined.txt', delim_whitespace=True, names=['Source', 'Destination'])
fb

# Features extraction

The feature names are stored in the files with extension '.featnames'. For example '0.featnames', '107.featnames' and so on.

In [None]:
#contents of one such file, namely '0.featnames'
with open('/content/Facebook-Social-Network-Analysis-master/facebook/0.featnames', 'r') as f:
        feature_names = f.readlines()
feature_names_df = pd.read_csv('/content/Facebook-Social-Network-Analysis-master/facebook/0.featnames', names=['Feature Names'])
# format of the values in the file : 
# 'feature number' + ' ' + 'feature name' + ';' + 'feature type'
# feature type always starts with 'anonymized feature' 
feature_names_df

In [None]:
# printing list of unique features in the '0.featnames'
unique_features = []

for i in range(len(feature_names)):
  feature_names[i] = feature_names[i].split(' ', 1)[1]
  feature_names[i] = feature_names[i].rsplit(';' , 1)[0]
  unique_features.append(feature_names[i])

list(set(unique_features))

## Function for extracting feature names

In [None]:
def ExtractFeatureNames(path):
    with open(path, 'r') as f:
        feature_names = f.readlines()

    for i in range(len(feature_names)):
        feature_names[i] = feature_names[i].split(' ', 1)[1]
        feature_names[i] = feature_names[i].rsplit('\n' , 1)[0]
        feature_names[i] = feature_names[i].rsplit(';' , 1)
        feature_names[i][1] = feature_names[i][1].rsplit(' ' , 1)[1]
        
    return feature_names

## Creating a consolidated list of features

In [None]:
#storing all the numbers of different files in a list
l = ['0','107','348','414','686','698','1684','1912','3437','3980']

node_features = []

# extracting features - 'location;id' and 'education;school;id'
for i in l:
    df = pd.read_csv('/content/Facebook-Social-Network-Analysis-master/facebook/'+i +'.feat', 
                     delim_whitespace=True, names=pd.MultiIndex.from_tuples(ExtractFeatureNames('/content/Facebook-Social-Network-Analysis-master/facebook/'+i +'.featnames'))).T
    node_features.append(df.loc[['location;id','education;school;id']])

## Creating a nested dictionary of all the nodes along with its features (attributes)

In [None]:
node_attributes = {}
for i in node_features:
    a = {j : i[i[j] == 1][j].index.tolist()[:2] for j in i.columns if len(i[i[j] == 1][j].index.tolist())>1 and 
                                                                 i[i[j] == 1][j].index.tolist()[0][0] == 'location;id'}
    node_attributes.update(a)

#creating the nested dictionary
for k,v in node_attributes.items():
    node_attributes[k] = {i[0] : i[1] for i in node_attributes[k]}
    
#sorting the dictionary
node_attributes = {i[0]: i[1] for i in sorted(node_attributes.items())}

In [None]:
list(node_attributes.items())[:10]

## Dropping unwanted nodes from fb dataframe

In [None]:
#dropping the extra nodes from 'fb' that are not in 'node_attributes' 
no_att_source = list(set([i for i in fb.Source]) - set(node_attributes.keys()))
no_att_dest = list(set([i for i in fb.Destination]) - set(node_attributes.keys()))

#finding indices of the above nodes in fb and dropping them
idx = []
for i in no_att_source:
    idx.extend(fb.index[fb.Source == i].tolist())
    
fb.drop(list(set(idx)), axis=0, inplace=True)
    
idx = []    
for i in no_att_dest:    
    idx.extend(fb.index[fb.Destination == i].tolist())
    
fb.drop(list(set(idx)), axis=0, inplace=True)

##Creating a list of all existing edges

In [None]:
#creating a list of node pair tuples from the 'fb' dataframe
existing_edges = [(fb.iloc[i,0],fb.iloc[i,1]) for i in range(fb.shape[0])]

#printing first 20 existing edges/node pairs
existing_edges[:5]

Ok, so the processing part is complete. Now let's start creating the graph.

# Creating graph

In [None]:
G = nx.Graph()

###Adding nodes

In [None]:
for i in node_attributes.keys():
    G.add_node(i)

## Adding edges

In [None]:
G.add_edges_from(existing_edges)

##Adding attributes/features

In [None]:
nx.set_node_attributes(G, node_attributes)

## Printing graph info

In [None]:
print(nx.info(G))

## Dropping nodes with low degree

In [None]:
# this removes the isolated nodes
deg = [i for i in G if G.degree(i) <3]
G.remove_nodes_from(deg)
print(nx.info(G))

## Creating a new dataframe

In [None]:
fb = pd.DataFrame(data=list(G.edges()), columns=['Node 1', 'Node 2'])
fb.head()

# Visualizations

## Circular layout

In [None]:
#the circular layout helps us understand how different nodes are connected to each other, 
#when the nodes are placed in a circular manner

plt.figure(figsize=(10,10))
pos = nx.circular_layout(G)
nx.draw_networkx(G, pos, with_labels=False, edge_color='.4')

plt.axis('off')
plt.tight_layout();

## Random layout

In [None]:
plt.figure(figsize=(50,50))
pos = nx.random_layout(G)
nx.draw_networkx(G, pos, with_labels=False)
plt.axis('off')
plt.tight_layout();

## Spring layout

In [None]:
plt.figure(figsize=(10,10))
nx.draw_networkx(G, alpha=0.7, with_labels=False)
plt.axis('off')
plt.tight_layout();

## Custom layout

In [None]:
G.nodes(2)

In [None]:
plt.figure(figsize=(30,30))

# node size is based on the degree of the node
node_size = [80*G.degree(v) for v in G]

# node color is based on the node attribute/feature value
node_color = [int(nx.get_node_attributes(G, 'location;id')[v]) for v in G]

colors=range(max(node_color))
cmap=plt.cm.Blues
vmin = min(colors)
vmax = max(colors)

# drawing graph with custom node size and color 
# using spring layout
pos = nx.spring_layout(G)
nx.draw_networkx(G, pos, node_size=node_size, 
                 node_color=node_color, alpha=0.7, 
                 with_labels=False, 
                 edge_color='.4', cmap=cmap)

# getting the values according to attributes 
# for the colorbar
sm = plt.cm.ScalarMappable(cmap=cmap, 
                           norm=plt.Normalize(
                               vmin = vmin, vmax=vmax)) 
sm._A = []

plt.title('Size of a node depends on degree of the node \n Color of a node depends on the location attribute of a  the node', fontsize=40)
plt.axis('off')
plt.colorbar(sm)

#plotting annotations for different nodes
ax = plt.gca()
ax.annotate("Node 1", xy=pos.get(17), xytext=(0, 60), 
            textcoords='offset points',
            arrowprops=dict(facecolor='black'))

ax1 = plt.gca()
ax1.annotate("Node 2", xy=pos.get(3280), xytext=(0, 60), 
             textcoords='offset points',
             arrowprops=dict(facecolor='black'))

ax2 = plt.gca()
ax2.annotate("Node 3", xy=pos.get(1078), xytext=(0, 60), textcoords='offset points',
             arrowprops=dict(facecolor='black'))

plt.suptitle('\n\nDegree of Node 1 (4) < Degree of Node 2 (31) < Degree of Node 3 (97)', fontsize=30)

plt.tight_layout();

In [None]:
#saving the graph
nx.write_gpickle(G,'/content/Facebook-Social-Network-Analysis-master/Graph.pickle')

#saving the dataframe
fb.to_csv('/content/Facebook-Social-Network-Analysis-master/fb.csv', index=False)