# Structural properties of semantic networks

In [98]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
import networkx as nx
import pandas as pd

### K-clique percolation thresholds for random graphs 

For each $k$ there is a certain threshold probability $p_c(k)$ above which k-cliques organize into a giant community in random Erdos-Renyi graphs:
$$
p_c(k)=\frac{1}{\left[ N(k-1)\right]^{\frac{1}{k-1}}},
$$
$N$ - the number of nodes. 

The following function returns the critical probabilities for $k=2$ and $k=3$ for random graphs with the same number of nodes as a given G:

In [99]:
def critical_probs(G):
    p1=1/G.number_of_nodes() # for link percolation
    p2=1/(np.sqrt(2*G.number_of_nodes()))
    return [p1,p2]

## English free assotiation networks

In [114]:
Gw=nx.read_graphml('data\\graphs\\data-swow.graphml').to_undirected() # the free association networks including from 
                                                                # the "English Small World of Words project"
Gf=nx.read_graphml('data\\graphs\\data-fl.graphml').to_undirected() # the Florida data set

Ge=nx.read_graphml('data\\graphs\\data-ed.graphml').to_undirected() # the Edinburgh Associative Thesaurus

graphs=[Gw,Gf,Ge]

In [115]:
df1 = pd.DataFrame(columns=['Nodes', 'Edges', 'Density','Transitivity','Clustering','p_c(2)','p_c(3)'])
df1['Network']=['SWOW-EN free association','Florida free association','Edinburgh free association']

In [116]:
df1['Nodes']=[nx.number_of_nodes(graph) for graph in graphs]
df1['Edges']=[nx.number_of_edges(graph) for graph in graphs]
df1['Density']=[nx.density(graph) for graph in graphs]
df1['Transitivity']=[nx.transitivity(graph) for graph in graphs]
df1['Clustering']=[nx.average_clustering(graph) for graph in graphs]

df1['p_c(2)']=np.array([critical_probs(graph) for graph in graphs])[:,0]
df1['p_c(3)']=np.array([critical_probs(graph) for graph in graphs])[:,1]

In [117]:
display(df1)

Unnamed: 0,Network,Nodes,Edges,Density,Transitivity,Clustering,p_c(2),p_c(3)
0,SWOW-EN free association,12217,352403,0.004723,0.051769,0.112828,8.2e-05,0.006397
1,Florida free association,5019,55246,0.004387,0.083425,0.186154,0.000199,0.009981
2,Edinburgh free association,8210,241461,0.007165,0.04814,0.102825,0.000122,0.007804


## English semantic networks: phonological, synonyms, taxonomic and multiplex

In [125]:
Ph=nx.read_graphml('data\\graphs\\data-phon.graphml') # phonological
Sn=nx.read_graphml('data\\graphs\\data-syn.graphml') # synonyms
Tx=nx.read_graphml('data\\graphs\\data-taxon.graphml') # synonyms

# for multiplex network
Mg=nx.Graph()
Mg.add_edges_from(list(Ph.edges()))
Mg.add_edges_from(list(Sn.edges()))
Mg.add_edges_from(list(Tx.edges()))
#nx.write_graphml(Mg,'data\\graphs\\data-multi.graphml')
graphs=[Ph,Sn,Tx,Mg]

In [119]:
df2 = pd.DataFrame(columns=['Network', 'Nodes', 'Edges', 'Density','Transitivity','Clustering','p_c(2)','p_c(3)'])
df2['Network']=['Phonological','Synonyms','Taxonomic','Multiplex']

df2['Nodes']=[nx.number_of_nodes(graph) for graph in graphs]
df2['Edges']=[nx.number_of_edges(graph) for graph in graphs]
df2['Density']=[nx.density(graph) for graph in graphs]
df2['Transitivity']=[nx.transitivity(graph) for graph in graphs]
df2['Clustering']=[nx.average_clustering(graph) for graph in graphs]

df2['p_c(2)']=np.array([critical_probs(graph) for graph in graphs])[:,0]
df2['p_c(3)']=np.array([critical_probs(graph) for graph in graphs])[:,1]

In [120]:
display(df2)

Unnamed: 0,Network,Nodes,Edges,Density,Transitivity,Clustering,p_c(2),p_c(3)
0,Phonological,4618,15447,0.001449,0.344888,0.245539,0.000217,0.010405
1,Synonyms,6526,13134,0.000617,0.283978,0.34388,0.000153,0.008753
2,Taxonomic,7943,42042,0.001333,0.048441,0.093201,0.000126,0.007934
3,Multiplex,8383,68505,0.00195,0.112498,0.283138,0.000119,0.007723


## Other languages

In [129]:
Ru=nx.read_graphml('data\\graphs\\data-rus.graphml').to_undirected() # russian

Du=nx.read_graphml('data\\graphs\\data-dutch.graphml') # dutch 
graphs=[Ru,Du]

In [130]:
df3 = pd.DataFrame(columns=['Network', 'Nodes', 'Edges', 'Density','Transitivity','Clustering','p_c(2)','p_c(3)'])
df3['Network']=['Rus','Dutch']

df3['Nodes']=[nx.number_of_nodes(graph) for graph in graphs]
df3['Edges']=[nx.number_of_edges(graph) for graph in graphs]
df3['Density']=[nx.density(graph) for graph in graphs]
df3['Transitivity']=[nx.transitivity(graph) for graph in graphs]
df3['Clustering']=[nx.average_clustering(graph) for graph in graphs]

df3['p_c(2)']=np.array([critical_probs(graph) for graph in graphs])[:,0]
df3['p_c(3)']=np.array([critical_probs(graph) for graph in graphs])[:,1]

In [131]:
display(df3)

Unnamed: 0,Network,Nodes,Edges,Density,Transitivity,Clustering,p_c(2),p_c(3)
0,Rus,5377,51191,0.003542,0.066898,0.163248,0.000186,0.009643
1,Dutch,10486,207810,0.00378,0.057785,0.138735,9.5e-05,0.006905
