# Demonstration of the Configurational Model
# 19-2-18

In [1]:
# Aims: To compare the network of messages which is created by the configuration\
# model with that actually observed in Facebook in December 2009

In [2]:
#%% IMPORT LIBRARIES NEEDED
import networkx_extended as nx
import empirical.Timeslice as ts   #For accessing network data
import time  #For timing
import numpy as np   #For plotting degree dist
import matplotlib.pyplot as plt
import pickle #For loading data for 2009 timeslice

In [3]:
#%% PROPERTIES OF NETWORK: METHODS FOR CALCULATING THEM
#Largest Component Size
def computeLCS(G):
    '''Calculates order parameter on network G. In this case maximum component size'''
    return len(max(nx.connected_components(G)))

#Average Clustering Coefficient
def computeClusteringCoefficients(Graph, isWeighted=False):
    clustering_dicts = nx.algorithms.cluster.clustering(Graph)
    avgClusteringCoefficient = nx.average_clustering(Graph)
    return avgClusteringCoefficient, clustering_dicts

def plot_distribution(d,name,n=1000):
    '''Plots loglog frequency and cumulative distributions for a quantity
    PARAMETERS
    ---
    d: dictionary
        Dictionary of values to be histogrammed
    name: string
        Name of the quantity represented by the list of values
    n: int
        Number of bins
    ''' 
#    log_max_degree = np.log10(max(d.values()))
    n=len(d)
    bin_edges = np.linspace(0,max(d.values()),n-1)
    bin_edges = list(bin_edges)
    bin_edges.insert(0,0.)
    #print(bin_edges)
    
    #TODO: Loop through and create a dictionary {degree: frequency}
    
    plt.figure()
    degree_dist,bins = np.histogram(list(d.values()),bins=bin_edges)
    bin_centres = [sum(bin_edges[i:i+1]) for i in range(0,len(bin_edges)-1)]
    plt.loglog(bin_centres,degree_dist,'+')
    plt.xlim(1,1e4)
    plt.grid()
    plt.title('{} distribution'.format(name.capitalize()))
    plt.xlabel('log({})'.format(name))
    plt.ylabel('log(Frequency)')
    plt.figure()
    
    degree_dist,degrees,patches = plt.hist(d.values(),bins=bin_edges,cumulative=True)
    plt.title('Cumulative {} distribution'.format(name))
    plt.xlabel(name)
    plt.ylabel('Cumulative frequency')

In [4]:
#%% DEGREE DIST FOR JAN 2009 Timeslice
print("---DEGREE DIST FOR DEC 2008 Timeslice---")
print("LOADING")
degree_dict =dict( pickle.load(open('empirical/Results/2008_DEC_Weighted-degree_dict.pkl','rb')))
print("DONE LOADING")
print("PLOTTING")
plot_distribution(dict(degree_dict),'degree')
print("DONE PLOTTING")
print("---")


---DEGREE DIST FOR DEC 2008 Timeslice---
LOADING
DONE LOADING
PLOTTING
DONE PLOTTING
---


In [6]:
#%% MODEL: USING DEGREE DIST FOR DEC 2008
#Prints "SELF-LOOP" if a self-loop is found
#Prints "DOUBLE EDGE" if an edge already exists between two users and increases weight on the edge

import configuration_model
print("")
print("MODEL: USING DEGREE DIST FOR DEC 2008")

#PARAMETERS
no_of_iterations = 9

lcs_list = list()
ccs_list = list()

print("TOTAL EDGES IN NETWORK: {}".format(sum(degree_dict.values())/2))
for i in range(no_of_iterations):
    F = configuration_model.generateConfigNetwork(degree_dict)
    print("---MICROSTATE {}---".format(i+1))
    if len(F.nodes()) < 100:
        #DRAW NETWORK
        fig = plt.figure()
        ax = fig.add_subplot(111)
        nx.draw_networkx(F,pos=nx.circular_layout(F),ax=ax)
        ax.set_xticks(list())
        ax.set_yticks(list())
        fig.show()
    
    lcs = computeLCS(F)
    lcs_list.append(lcs)
    ccs,ccs_dict = computeClusteringCoefficients(F,True)
    ccs_list.append(ccs)
    print("RESULTS FOR MICROSTATE")
    print('Largest cluster size: {}'.format(lcs))
    print('Av. clustering coeff: {}'.format(ccs))
    print("----------------------")

lcs_av = sum(lcs_list)/len(lcs_list)
ccs_av = sum(ccs_list)/len(ccs_list)
def std(l):
    '''Returns standard deviation of a list l'''
    SS = sum([i**2 for i in l])   #Sum of squares
    N = len(l)
    MSS = SS/N #Mean Sum of Squares
    mean = sum(l)/N
    return (MSS - mean**2)**.5

lcs_std = std(lcs_list)
ccs_std = std(ccs_list)
print("---RESULTS FOR CONFIGURATION MODEL ({} ITERATIONS)---".format(no_of_iterations))
print('PEOPLE: {}'.format(len(F.nodes())))
print('INTERACTIONS: {}'.format(len(F.edges())))
print('Largest cluster size: mean {:.0f}, std: {:.2g}'.format(lcs_av,lcs_std))
print('Av. clustering coeff: mean: {:.3g}, std: {:.2g}'.format(ccs_av,ccs_std))
print("-------------")


MODEL: USING DEGREE DIST FOR DEC 2008
TOTAL EDGES IN NETWORK: 31198.0
---MICROSTATE 1---
RESULTS FOR MICROSTATE
Largest cluster size: 20836
Av. clustering coeff: 0.0002835217391234221
----------------------
---MICROSTATE 2---
RESULTS FOR MICROSTATE
Largest cluster size: 23396
Av. clustering coeff: 0.0008409066894530099
----------------------
---MICROSTATE 3---
RESULTS FOR MICROSTATE
Largest cluster size: 23396
Av. clustering coeff: 0.0013832914035443297
----------------------
---MICROSTATE 4---
RESULTS FOR MICROSTATE
Largest cluster size: 23396
Av. clustering coeff: 0.0018981280541816082
----------------------
---MICROSTATE 5---
RESULTS FOR MICROSTATE
Largest cluster size: 23396
Av. clustering coeff: 0.0023882747866938252
----------------------
---MICROSTATE 6---
RESULTS FOR MICROSTATE
Largest cluster size: 23396
Av. clustering coeff: 0.0029265330863976423
----------------------
---MICROSTATE 7---
RESULTS FOR MICROSTATE
Largest cluster size: 23396
Av. clustering coeff: 0.0035113909553

In [7]:
#%% EMPIRICAL RESULTS FOR DEC 2008
print("")
print("EMPIRICAL RESULTS FOR DEC 2008 TIMESLICE")
loadedGraph = ts.loadGEXF("C:/Users/admin/Documents/Physics/year 3/WWWPhysics-PC/empirical/Data/WeightedNetwork/2008_DEC_WeightedGraph.gexf")
print('PEOPLE: {}'.format(len(loadedGraph.nodes())))
print('INTERACTIONS: {}'.format(len(loadedGraph.edges())))
lcs = computeLCS(loadedGraph)
av_clustering, clustering_dict = computeClusteringCoefficients(loadedGraph,True) 
print("---RESULTS---")
print('Largest cluster size: mean {:.0f}'.format(lcs))
print('Av. clustering coeff: mean: {:.3g}'.format(av_clustering))
print("-------------")


EMPIRICAL RESULTS FOR DEC 2008 TIMESLICE
PEOPLE: 23396
INTERACTIONS: 30964
---RESULTS---
Largest cluster size: mean 18885
Av. clustering coeff: mean: 0.0417
-------------


In [8]:
#Self edges
SS = sum([i*(i-1) for i in degree_dict.values()])
expectedSE = SS/(sum(degree_dict.values()))
print("Expected number of self-edges: {:.3g}".format(expectedSE))

Expected number of self-edges: 4.77


Largest cluster size was found to be similar to that observed in the actual interaction network giving a mean value for LCS using the degree distribution and nodes for the weighted timeslice of the network for December 2008. 

Clustering coefficients were found to be significantly smaller for the random network than the observed network. This is to be expected as the probability of someone with whom you have interacted posting or receiving a wall post on the wall of someone else whom you have interacted with is clearly far higher than random. This shows that there are clear correlations between the behaviour of users, and motivates us to use a network which uses additional information to model the network.