In [None]:
import gdown
import tensorflow as tf
import numpy as np
import os
import random
import networkx as nx
import csv

!mkdir datasets
#!gdown https://drive.google.com/uc?id=1v03XWRternGLDpRfKbRGoMiVX3dpOW3G -O datasets/facebook_edges.txt

#############################################################################################
# Douban DATASET DOWNLOADS source: http://datasets.syr.edu/pages/datasets.html
#############################################################################################
# Download blogcatalog dataset edgelist in cvs format
!gdown https://drive.google.com/uc?id=1ssjgKF5WpiXcIk7DfF6BXwPoWkqr5rOS -O datasets/douban_edges.csv

#############################################################################################
# Youtube DATASET DOWNLOADS  source: http://datasets.syr.edu/pages/datasets.html
#############################################################################################
#!gdown https://drive.google.com/uc?id=12aGrbOZqVMfOP46X8lj5qwqQui4kbMjZ -O datasets/youtube_edges.csv


!gdown https://drive.google.com/uc?id=1QwaC2pz6wC8QGAA1N7208SxEzdPfkn3S -O GraphEmbedding.zip
!unzip GraphEmbedding.zip

Downloading...
From: https://drive.google.com/uc?id=1ssjgKF5WpiXcIk7DfF6BXwPoWkqr5rOS
To: /content/datasets/douban_edges.csv
8.29MB [00:00, 38.7MB/s]
Downloading...
From: https://drive.google.com/uc?id=1QwaC2pz6wC8QGAA1N7208SxEzdPfkn3S
To: /content/GraphEmbedding.zip
100% 1.06M/1.06M [00:00<00:00, 70.3MB/s]
Archive:  GraphEmbedding.zip
   creating: GraphEmbedding/
  inflating: __MACOSX/._GraphEmbedding  
  inflating: GraphEmbedding/.DS_Store  
  inflating: __MACOSX/GraphEmbedding/._.DS_Store  
  inflating: GraphEmbedding/LICENSE  
  inflating: __MACOSX/GraphEmbedding/._LICENSE  
   creating: GraphEmbedding/pics/
  inflating: __MACOSX/GraphEmbedding/._pics  
  inflating: GraphEmbedding/README.md  
  inflating: __MACOSX/GraphEmbedding/._README.md  
  inflating: GraphEmbedding/setup.py  
  inflating: __MACOSX/GraphEmbedding/._setup.py  
  inflating: GraphEmbedding/.gitignore  
  inflating: __MACOSX/GraphEmbedding/._.gitignore  
   creating: GraphEmbedding/examples/
  inflating: __MACOSX/G

In [None]:
dataset = "douban" # possible values: facebook, blogcatalog, douban, lastfm, youtube, flickr

# Setting different seeds for reproducability
seed_value= 122
os.environ['PYTHONHASHSEED']=str(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

In [None]:
def read_data(dataset):
  if dataset == "facebook":
      G=nx.read_edgelist("./datasets/facebook_edges.txt")
  elif dataset == "blogcatalog":
      G = nx.read_edgelist('./datasets/blogcatalog_edges.csv', delimiter=',', nodetype=str, encoding="utf-8")
  elif dataset == "douban":
      G = nx.read_edgelist('./datasets/douban_edges.csv', delimiter=',', nodetype=str, encoding="utf-8")
      G = nx.relabel.convert_node_labels_to_integers(G, first_label=0, ordering="sorted")
      mapping = {}
      for v in G.nodes():
        mapping[v] = str(v)
      G = nx.relabel.relabel_nodes(G, mapping)
  elif dataset == "youtube":
      G = nx.read_edgelist('./datasets/youtube_edges.csv', delimiter=',', nodetype=str, encoding="utf-8")
      G = nx.relabel.convert_node_labels_to_integers(G, first_label=0, ordering="sorted")
  elif dataset == "flickr":
      G=nx.read_edgelist("./datasets/flickr_edges.txt")
  else:
      print("Invalid dataset name")
  
  nodes = list(G.nodes())
  edges = list(G.edges())
  num_nodes = len(nodes)
  num_edges = len(edges)
  print("Number of nodes", num_nodes)
  print("Number of edges", num_edges)
  return G, nodes, edges, num_nodes, num_edges

In [None]:
from GraphEmbedding.ge.models import Node2Vec, DeepWalk, SDNE, Struc2Vec, LINE


def train_embedding(G, params={}, embedding_method="node2vec", num_nodes=4039, dataset="facebook"):
  print("Params for Embeddings: ", params, " with embedding type: ", embedding_method)
  range_start = 0
  #if dataset != "facebook":
  #  range_start = 1
  #print("range start is", range_start)

  if embedding_method == "node2vec":
    q=1
    p=1
    window_size = 10
    if params:
      if 'q' in params.keys():
        q = params['q']
      if 'p' in params.keys():
        p = params['p']
      if 'window_size' in params.keys():
        window_size = params['window_size']
    model = Node2Vec(G, 80, 10, q=q, p=p) # default params from paper implementation
    model.train(window_size = window_size)
    embedding_vectors = model.get_embeddings()
  elif embedding_method == "deepwalk":
    model = DeepWalk(G, 80, 40) # parameters as in the paper
    model.train()
    embedding_vectors = model.get_embeddings()
  elif embedding_method == "sdne":
    layer_config = [int(num_nodes*0.1), 128]
    if params:
      layer_config = params["layer_config"]
    model = SDNE(G, hidden_size=layer_config) # same hidden sizes as in paper for arxiv GR-QC as it has similar number of nodes as facebook
    model.train( epochs=40, batch_size=512, verbose=0)
    embedding_vectors = model.get_embeddings()
  elif embedding_method == "struc2vec":
    model = Struc2Vec(G)
    model.train()
    embedding_vectors = model.get_embeddings()
  elif embedding_method == "line":
    order = "second"
    if params:
      order = params["order"]
    embedding_size = 128
    if order == "all":
      embedding_size = 64
    model = LINE(G, embedding_size= embedding_size, order=order)
    model.train(epochs=50, batch_size=512, verbose=2)
    embedding_vectors = model.get_embeddings()
  
  num_nodes = max([int(emb) for emb in embedding_vectors.keys()])
  

  embedding_vectors_res = np.array(list(embedding_vectors.values()))
  for i in range(range_start,num_nodes-range_start):
    embedding_vectors_res[i] = embedding_vectors[str(i)]
  np.save(dataset+"_"+embedding_method+".npy", embedding_vectors_res)
  print("EMBEDDING VECTORS SAVED")

In [None]:
embedding_techniques = [ "line"]# "node2vec", "sdne"]
G, nodes, edges, num_nodes, num_edges = read_data(dataset)
for emb in embedding_techniques:
  train_embedding(G, {}, emb, num_nodes, dataset)

Number of nodes 154907
Number of edges 327094
Params for Embeddings:  {}  with embedding type:  line




Epoch 1/50
959/959 - 374s - loss: 0.5636
Epoch 2/50
959/959 - 368s - loss: 0.2593
Epoch 3/50
959/959 - 368s - loss: 0.2184
Epoch 4/50
959/959 - 368s - loss: 0.1946
Epoch 5/50
959/959 - 371s - loss: 0.1725
Epoch 6/50
959/959 - 375s - loss: 0.1487
Epoch 7/50
959/959 - 376s - loss: 0.1257
Epoch 8/50
959/959 - 371s - loss: 0.1037
Epoch 9/50
959/959 - 369s - loss: 0.0839
Epoch 10/50
