In [1]:
from mygraph import MyGraph
from helpers import *
import pandas as pd

# Importing Graph Data from CSV File

In [2]:
d = pd.read_csv("fb-pages-company-edges.csv")
d.head(3)

Unnamed: 0,node_1,node_2
0,0,2243
1,0,12084
2,0,6169


In [3]:
edge_list = [(x,y) for [x,y] in d.values]
myg = MyGraph()
for i in range(14113): # Creating 14113 nodes in the ordinary order
    myg.add_vertex(i)
myg.from_edge_list(edge_list) # Constructing the graph using the list of its edges

In [4]:
%%time
print_dict(myg.graph_stats())

num_nodes            14113 
num_edges            52126 
nodes_degrees        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 72, 73, 75, 76, 77, 78, 79, 81, 82, 83, 84, 86, 88, 90, 92, 97, 98, 99, 102, 103, 109, 110, 112, 129, 144, 145, 147, 160, 171, 179, 185, 194, 198, 212, 215] 
average_degree       7.386948203783746 
Wall time: 1min 13s


In [5]:
%%time
# Modifying the graph of the facebook pages of companies
myg_modified = remove_add_random_edges(myg, num_remove=500, num_add=500,  seed=1)

Wall time: 2min 25s


In [6]:
df = pd.DataFrame(myg_modified.get_edge_list())
df.to_csv("fb_co_pages_modified.csv", columns=None, index=False)

In [7]:
%%time
print_dict(myg_modified.graph_stats())

num_nodes            14113 
num_edges            52126 
nodes_degrees        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 72, 73, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 87, 89, 90, 92, 94, 95, 98, 102, 108, 109, 111, 128, 129, 140, 143, 146, 158, 170, 179, 184, 194, 196, 212, 213] 
average_degree       7.386948203783746 
Wall time: 1min 14s


# Computing the pagerank of the modified graph using NetworkX

In [8]:
nxg_modified = copy_my_graph(myg_modified)

In [9]:
%%time
# The array of pagerank of vertices of the graph
np_target_page = np.array(list(nx.algorithms.link_analysis.pagerank_alg.pagerank(nxg_modified).values())).reshape(-1,1)
np_target_page

Wall time: 924 ms


array([[9.98297755e-05],
       [9.02870563e-05],
       [5.44038647e-05],
       ...,
       [2.86887214e-05],
       [4.16821338e-05],
       [3.86940050e-05]])

In [10]:
# Saving the pagerank as a numpy array into a file
np.save("fb_co_pages_modified_pageranks.npy", np_target_page, allow_pickle=False, fix_imports=True)

# The NDFC matrix representation

In [11]:
sp_1 = myg.starting_points(ratio=1.3, max_length=50, starting_length=1)
sp_1

[1, 2, 3, 4, 6, 8, 11, 15, 21, 29, 39, 52, 69, 92, 122, 161, 211]

In [12]:
%%time
# producing the order 5 NDFC matrix rep. of nodes of the modified garph w.r.t. the starting points sp_1:
mats_1 = NDFC_matrix_rep(myg_modified, starting_points=sp_1, radius=5, decimals=6)
mats_1[0]

Wall time: 8min 34s


array([[1.      , 0.      , 0.      , 4.      , 0.      , 0.      ,
        2.      , 2.      , 0.      , 1.      , 0.      , 0.      ,
        0.      , 0.      , 0.      , 0.      , 0.      ],
       [0.1     , 0.1     , 1.2     , 1.7     , 0.7     , 1.8     ,
        1.9     , 2.4     , 0.3     , 0.7     , 0.3     , 0.3     ,
        0.1     , 0.1     , 0.      , 0.2     , 0.      ],
       [0.79661 , 0.661017, 0.745763, 1.711864, 1.711864, 1.881356,
        2.491525, 2.338983, 2.135593, 2.711864, 1.033898, 1.152542,
        0.508475, 0.389831, 0.271186, 0.254237, 0.186441],
       [0.33038 , 0.574684, 0.660759, 1.593671, 1.711392, 2.146835,
        2.483544, 2.694937, 2.21519 , 1.732911, 1.172152, 1.220253,
        0.516456, 0.317722, 0.159494, 0.38481 , 0.105063],
       [0.179433, 0.3157  , 0.360982, 0.833051, 0.879391, 1.130343,
        1.158062, 1.307025, 1.134363, 0.794118, 0.573212, 0.455353,
        0.298138, 0.178163, 0.14854 , 0.12738 , 0.070038],
       [0.128949, 0.24779

In [13]:
mats_1.shape

(14113, 6, 17)

In [14]:
np.save("fb_co_pages_modified_NDFC_matrix_r1-30_sta1_max50_rad5.npy", mats_1, allow_pickle=False, fix_imports=True)