In [1]:
import networkx as nx
import os, sys, time
import pandas as pd
import numpy as np

In [6]:
def NetStats(g, ISO):
    
    results = {}
    G = nx.read_gpickle(g)
    results['number_of_edges'] = G.number_of_edges()
    results['number_of_nodes'] = G.number_of_nodes()
    
    Gs = list(nx.strongly_connected_component_subgraphs(G))
    
    # identify graphs worthy of analysis (set thresh appropriately!)
    iterator = 0
    counts, edges, nodes = [],[],[]
    for g in Gs:
        counts.append(iterator)
        edges.append(g.number_of_edges())
        nodes.append(g.number_of_nodes())
        iterator+=1
    df = pd.DataFrame({'id':counts,'edges':edges,'nodes':nodes})
    df = df.sort_values(by = 'edges', ascending = False)
    thresh = df.edges.iloc[0] * 0.5
    df = df.loc[df.edges >= thresh]
    print(df)
    id_list = list(df.id)
    
    for i in range(0, len(id_list)):
        
        # Section 1
        start = time.time()
        i = 0
        curr_G = Gs[id_list[i]]
        undirected_G = nx.Graph(curr_G)

        circuits = nx.cycle_basis(undirected_G)
        cyclomatic_number = len(circuits)
        results['G%s_cyclomatic_number' % i] = cyclomatic_number

        e = undirected_G.number_of_edges()
        v = undirected_G.number_of_nodes()
        print('\tTime elapsed for Section 1: %s seconds' % (time.time() - start))
        
        # Section 2
        start = time.time()
        results['G%s_number_of_edges'% i] = e
        results['G%s_number_of_nodes'% i] = v
        results['G%s_alpha'% i] = cyclomatic_number / ((2 * v) - 5)
        results['G%s_beta'% i] = e / v
        results['G%s_gamma'% i] = e / (3 * (v - 2))
        print('\tTime elapsed for Section 2: %s seconds' % (time.time() - start))
    
        # Section 3
        start = time.time()
        %time ecc = nx.eccentricity(undirected_G)
        %time results['G%s_diameter' %i] = nx.diameter(undirected_G, ecc)
        %time results['G%s_radius' %i] = nx.radius(undirected_G, ecc)
        %time results['G%s_average_clustering' %i] = nx.average_clustering(undirected_G)
        %time results['G%s_degree_assortativity_coefficient' %i] = nx.degree_assortativity_coefficient(undirected_G)
        #%time results['G%s_global_efficiency' %i] = nx.global_efficiency(undirected_G)
        #%time results['G%s_av_node_connectivity' %i] = nx.average_node_connectivity(undirected_G)
        print('\tTime elapsed for Section 3: %s seconds' % (time.time() - start))
        
        # Section 4
        start = time.time()
        Z = list(nx.degree_centrality(undirected_G).values())
        results['G%s_av_degree_centrality' % i] = np.mean(Z)
        results['G%s_0.25_degree_centrality' % i] = np.percentile(Z, 25)
        results['G%s_0.50_degree_centrality' % i] = np.percentile(Z, 50)
        results['G%s_0.75_degree_centrality' % i] = np.percentile(Z, 75)
        print('\tTime elapsed for Section 4: %s seconds' % (time.time() - start))
        
        # Section 5
        start = time.time()
        Z = list(nx.closeness_centrality(undirected_G).values())
        results['G%s_av_closeness_centrality' % i] = np.mean(Z)
        results['G%s_0.25_closeness_centrality' % i] = np.percentile(Z, 25)
        results['G%s_0.50_closeness_centrality' % i] = np.percentile(Z, 50)
        results['G%s_0.75_closeness_centrality' % i] = np.percentile(Z, 75)
        print('\tTime elapsed for Section 5: %s seconds' % (time.time() - start))
        
        # Section 6
        start = time.time()
        Z = list(nx.betweenness_centrality(undirected_G).values())
        results['G%s_av_betweenness_centrality' % i] = np.mean(Z)
        results['G%s_0.25_betweenness_centrality' % i] = np.percentile(Z, 25)
        results['G%s_0.50_betweenness_centrality' % i] = np.percentile(Z, 50)
        results['G%s_0.75_betweenness_centrality' % i] = np.percentile(Z, 75)
        print('\tTime elapsed for Section 6: %s seconds' % (time.time() - start))
        
        # Section 7
        start = time.time()
        try:
            Z = list(nx.eigenvector_centrality(undirected_G).values())
            results['G%s_av_eigenvector_centrality' % i] = np.mean(Z)
            results['G%s_0.25_eigenvector_centrality' % i] = np.percentile(Z, 25)
            results['G%s_0.50_eigenvector_centrality' % i] = np.percentile(Z, 50)
            results['G%s_0.75_eigenvector_centrality' % i] = np.percentile(Z, 75)
        except:
            pass
        print('\tTime elapsed for Section 7: %s seconds' % (time.time() - start))
        
        """
        # Section 8
        start = time.time()
        try:
            Z = list(nx.communicability_betweenness_centrality(undirected_G).values())
            results['G%s_av_communicability_betweenness_centrality' % i] = np.mean(Z)
            results['G%s_0.25_av_communicability_betweenness_centrality' % i] = np.percentile(Z, 25)
            results['G%s_0.50_av_communicability_betweenness_centrality' % i] = np.percentile(Z, 50)
            results['G%s_0.75_av_communicability_betweenness_centrality' % i] = np.percentile(Z, 75)
        except:
            pass
        print('\tTime elapsed for Section 8: %s seconds' % (time.time() - start))
        """
        
    df = pd.DataFrame(results, index = ['value'])
    df = df.transpose().reset_index().rename(columns = {'index':'var_name'})
    df['country'] = ISO
    
    return df

In [7]:
root = r'D:\Criticality II\country_networks'
Q = []
for q, t, folder in os.walk(root):
    if q[-6:] == 'output':
        Q.append(q)
for q in Q:
    ISO = q[-10:-7]
    if ISO not in ['ABW','AFG']:
        
        print('...processing %s' % ISO)
        g = os.path.join(q, '{}_processed.pickle'.format(ISO))
        D = NetStats(g, ISO)
        path = r'C:\Users\charl\Documents\CE\Criticality\Netstats'
        D.to_csv(os.path.join(path, '%s_processed_netstats.csv' % ISO))

...processing AGO
   id  edges  nodes
0   0   7227   2674
	Time elapsed for Section 1: 0.07486414909362793 seconds
	Time elapsed for Section 2: 0.0 seconds
Wall time: 14.5 s
Wall time: 0 ns
Wall time: 0 ns
Wall time: 28 ms
Wall time: 25 ms
	Time elapsed for Section 3: 14.518999338150024 seconds
	Time elapsed for Section 4: 0.0010001659393310547 seconds
	Time elapsed for Section 5: 14.598000288009644 seconds
	Time elapsed for Section 6: 19.646999835968018 seconds
	Time elapsed for Section 7: 1.1510004997253418 seconds
...processing AIA
   id  edges  nodes
0   0     94     33
	Time elapsed for Section 1: 0.0 seconds
	Time elapsed for Section 2: 0.0 seconds
Wall time: 2 ms
Wall time: 0 ns
Wall time: 0 ns
Wall time: 0 ns
Wall time: 991 µs
	Time elapsed for Section 3: 0.0050008296966552734 seconds
	Time elapsed for Section 4: 0.0 seconds
	Time elapsed for Section 5: 0.0029993057250976562 seconds
	Time elapsed for Section 6: 0.0029702186584472656 seconds
	Time elapsed for Section 7: 0.014024

KeyboardInterrupt: 