In [1]:
import networkx as nx
import numpy as np
import utils as utl
import json
import matplotlib.pyplot as plt
import logging
import datetime as dt
import humanize

In [2]:
logging.basicConfig(filename='log-metrics.txt',
                            filemode='a',
                            format='%(asctime)s [%(levelname)s]: %(message)s',
                            #format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
                            datefmt='%H:%M:%S',
                            level=logging.DEBUG)
logging.info("")

## Stats

| network_name | Number of nodes | Number of edges |
|---|---|---|
| soc-youtube.mtx | 495.957 | 1.936.748 |
| soc-twitter-follows.mtx | 404.719 | 713.319 |
| soc-academia.edges | 200.169 | 1.022.883 |
| soc-Slashdot0902.txt.gz | 82.168 | 582.533 |
| twitter_combined.txt.gz | 81.306 | 1.342.310 |
| socfb-OR.mtx | 63.392 | 816.886 |
| socfb-UF21.mtx | 35.123 | 1.465.660 |
| **`socfb-Rutgers89.mtx`** | 24.580 | 784.602 |
| **`wiki-Vote.txt.gz`** | 7.115 | 103.689 |
| **`socfb-Bowdoin47.mtx`** | 2.252 | 84.387 |
| **`socfb-Simmons81.mtx`** | 1.518 | 32.988 |
| **`socfb-Haverford76.mtx`** | 1.446 | 59.589 |



In [3]:
logging.info("\nStar process ============================================================")

#network_name = "socfb-Simmons81.mtx"
#network_name = "socfb-Bowdoin47.mtx"
#network_name = "socfb-Haverford76.mtx"
#network_name = "fb1.txt"
#network_name = "fb2.txt"
#network_name = "fb3.txt"
#network_name = "fb4.txt"
#network_name = "wiki-Vote.txt.gz"
#network_name = "socfb-Rutgers89.mtx"

network_name = "soc-twitter-follows.mtx" # Number of nodes: 404719 Number of edges: 713319

#network_name = "twitter_combined.txt.gz"
#network_name = "soc-youtube.mtx"
#network_name = "soc-academia.edges"
#network_name = "socfb-OR.mtx"
#network_name = "soc-Slashdot0902.txt.gz"
#network_name = "socfb-UF21.mtx"


logging.info(network_name)

In [4]:
G = nx.read_adjlist(f'data/{network_name}', nodetype=int)

original_info = nx.info(G)
print(original_info)

Name: 
Type: Graph
Number of nodes: 404719
Number of edges: 713319
Average degree:   3.5250


In [5]:
logging.info(f'Original Network Info:\n{original_info}')

In [6]:
dts = dt.datetime.now()

#if nx.is_connected(G) == False:
#    cc = nx.connected_components(G)
#    print('Subgraphs generated')
#else:
#    print('Is connected')
cc = nx.connected_components(G)
logging.info(f'Extract Connected Components')

In [7]:
def connected_components(graph):
    """
    Returns basic statistics about the connected components of the
    graph. This includes their number, order, size, diameter, radius,
    average clusttering coefficient, transitivity, in addition to basic
    info about the largest and smallest connected components.
    """
    subgraph_stats = {'size':None}
    new_graph=None
    for index, component in enumerate(cc):
        size = len(graph.edges(component))

        if subgraph_stats["size"] == None or subgraph_stats["size"] < size:
            subgraph_stats["order"] = len(component)
            subgraph_stats["size"] = size

            subgraph = graph.subgraph(component)
            subgraph_stats["avg_cluster"] = nx.average_clustering(subgraph)
            subgraph_stats["transitivity"] = nx.transitivity(subgraph)

            eccentricity = nx.eccentricity(subgraph)
            ecc_values = eccentricity.values()
            subgraph_stats["diameter"] = max(ecc_values)
            subgraph_stats["radius"] = min(ecc_values)
            new_graph = subgraph
    return subgraph_stats, new_graph

In [None]:
%%time

dts = dt.datetime.now()
stats, subgraph = connected_components(G)
logging.info(f'Calculation of the largest sub graph: {humanize.precisedelta(dts-dt.datetime.now())}')

In [None]:
subgraph_info = nx.info(subgraph)
logging.info(f'Subgraph Info:\n{subgraph_info}')
print(subgraph_info)

In [None]:
logging.info(f'Subgraph Stats:\n{stats}')

In [None]:
network_name = network_name.split('.')[0]

In [None]:
nx.write_gexf(subgraph, f"{network_name}.gexf")
logging.info(f"Generate {network_name}.gexf")

In [None]:
TG = nx.read_gexf(f"{network_name}.gexf")

In [None]:
print(nx.info(TG))

In [None]:
logging.info(f"Process Done for {network_name}")