# Experiments

In [1]:
## reset defaults 
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)

## imports 
from utils.utils import extract_zip, read_net_files
from utils.utils import extract_data_and_save
from utils.utils import extract_airport_descriptors
from utils.utils import make_histograms_pdf
import gc 

In [2]:
print("""Structural descriptors of complex networks""")
print("""Autors: Edison Bejarano - Eric Walzthöny""")

Structural descriptors of complex networks
Autors: Edison Bejarano - Eric Walzthöny


### Issues 1: Average Degree

In [None]:
"""
Checking el average degree 
tu formula de:

    average_degree = "{:.4f}".format(
                                    float(edges / nodes)
                                    )  # Total Edges/Total Nodes=Average Degree

Esta mal, porque solo toma en cuenta el numero total de nodos
(asume que cada nodo tiene una sola conexion), y no las conexiones de cada nodo. 

Por ejemplo, el nodo, O2, abajo:     
    O1-O2-O3
       |
       O4
Hay 4 nodos, pero no todos tienen el degree igual
Nodo 01, 03, 04 - Degree: 1 (una conexion)
Nodo 02 - Degree: 3 (tres conexiones)

Fijate en la implementacion de abajo de "average_degree_graph", que se copio del libro.


"""

import numpy as np
import igraph
from igraph import mean


## loading a sample network
g = net_files['toy']['rb25.net'][0]

## Edges & Vertices (Nodes)
edges = g.ecount()
vertex = g.vcount()

## Checking Degrees of the network
## Tu implementacion 
edison_average_degree = edges / vertex  # Total Edges / Total Nodes =Average Degree
# Manual Method 1
avg_degree_calculated = sum(g.degree()) / vertex
## Manual Method 2
igraph_avg_degree_calculated = round(mean(g.degree()), 3)
## Manual Method 3
def average_degree_graph(graph:igraph.Graph) -> float:
    """
    manually calculates the degree of each vertex
    
    from the textbook (CN, page 127):
    
    Average Degree = 1/n * sum(degree of vertex in g)
    
    """
    ## Degree (Connections) for each node in the graph 
    list_of_degrees = [x.degree() for x in graph.vs()]
    ## Sum of all the degrees (Connections)
    degree_sum = sum(list_of_degrees)
    ## Sum_Degrees / (total number of nodes)
    average_degree = degree_sum / graph.vcount()
    return average_degree

## Comparing the results 
print(edges, vertex)
print(edison_average_degree, avg_degree_calculated, igraph_avg_degree_calculated, average_degree_graph(g))

### Issues 2: Transitivity / Average Clustering coefficient

In [None]:
## Chapter 7 complex networks !!!!!!!!!!!!!!!!!!

## The professor asks for this: 
## Average clustering coefficient (average of the clustering coefficient of each node)
## This means:
## For each node / vertex: calculate the transitivity 
## average the transitivty 

# transitivity for each node in the graph  
trans_per_vertex = [round(g.transitivity_local_undirected(vertices=v['id']),4) for v in g.vs()]

# average of all transitivities 
average_transitivty = round(np.mean(trans_per_vertex), 4)

## check if they are the same 
assert average_transitivty == (round(sum(trans_per_vertex) / g.vcount(),4))
## these two are equal 
print(round(sum(trans_per_vertex) / g.vcount(),4))
print(average_transitivty)

## These two are the same as above
## this is the average transitivity of each node!
print(round(g.transitivity_avglocal_undirected(), 4))
## this as well 
print(round(np.mean(g.transitivity_local_undirected()),4))

# only off one 
## this is the global transitivity for all the network
## this will only take the global connectiveness of nodes
## wvu - uvw, but not the individual nodes connections
print("This is the weird one")
print(round(g.transitivity_undirected(),4))


### Issues 3: Average Path Length

In [None]:
## Proff wants this:
## Average path length (average distance between all pairs of nodes)
## to do this: 
## length of paths for each node 
## average it out for all nodes 
avg_path_len = g.average_path_length()

print(avg_path_len)

# shortest paths 
## from the documentation:
## Calculates shortest path lengths for given vertices in a graph.
## this will return ONLY THE SHORTEST path for each node 
## if node A has 3 paths: 1.3, 4.5, 0.9, it will return 0.9 
## and not the average(1.3, 4.5, 0.9) ~~ correct average length for each node 
sh_path = g.shortest_paths()

## get all the paths 
print(np.mean(sh_path))

### Testing

In [4]:
## Variable configuration
## Path to the zip file 
ZIP_PATH = './A1-networks.zip'
## extracted directory
OUTPUT = './A1-networks/'   

In [None]:
## Extracting the zip-file
print("Extracting zip file...\n")
extract_zip(ZIP_PATH, OUTPUT)

## Reading all the pajek files (.net files)
print("Loading Pajek files into memory...\n")
net_files = read_net_files(OUTPUT, verbosity=True)



In [None]:
##### Part A: Numerical Descriptors of Networks ######
## extract the data and optionally save the CSV
print("Initializing Numerical Descriptor Extraction of Pajek files...\n")
df = extract_data_and_save(net_files, "new_Descriptors", save_csv=True)
## verbosity 
print("Succesfully saved the Numerical Descriptors\n")
## sanity 
gc.collect()

In [None]:
##### Part B: Numerical Descriptors of Real Network - AIRPORT ######
## get the airport file from memory 
print("Initializing the Airport Numerical Description Extraction....\n")
airport = net_files['real']['airports_UW.net'][0]
## For the report 
airport_to_calculate = [
                        "PAR",
                        "LON",
                        "FRA",
                        "AMS",
                        "MOW",
                        "NYC",
                        "ATL",
                        "BCN",
                        "WAW",
                        "CHC",
                        "DJE",
                        "ADA",
                        "AGU",
                        "TBO",
                        "ZVA",
                        ]

## extract the airport descriptors
air_port = extract_airport_descriptors(graph = airport, 
                            file_name = "NEW_Airport_Descriptor",
                            airport_list=airport_to_calculate,
                            save_csv = True)
print("The Airport Numerical Descriptors were extracted correctly! \n")

## sanity 
gc.collect()

In [None]:
### PART C: Histograms and CCDF
make_histograms_pdf(bins=15)