# 不要再运行这个笔记本了
# 这个笔记本的意义就是说那谁说的确定维数办法不可行

In [1]:
args = {
    'input': './football',
    'output': './emb',
    'start_dim': 1,
    'end_dim': 200,
    'step': 1,
    'length': 10,
    'num_walks': 20,
    'window_size': 5,
    'iter': 10,
    'workers': 8,
    'p': 1.0,
    'q': 1.0,
    'weighted': False,
    'directed': False
}

In [2]:
# This is the function for Laplacian Eigenmap using Cupy. The presence of GPU is required.

import numpy as np
import networkx as nx
import cupy as cp

def lap_cupy(graph, dim):
    """
    Compute the Laplacian embedding of a graph using CuPy.

    Parameters:
    graph (networkx.classes.graph.Graph): The input graph.
    dim (int): The dimension of the embedding.

    Returns:
    numpy.ndarray: The Laplacian embedding of the graph.
    """
    # Check inputs
    assert isinstance(graph, nx.Graph), "Input graph must be a NetworkX graph."
    assert isinstance(dim, int) and dim > 0, "Input dim must be a positive integer."
    assert dim < graph.number_of_nodes(), "Input dim must be less than the number of nodes in the graph."

    # Convert the adjacency matrix of the graph to a CuPy array
    A = cp.asarray(nx.adjacency_matrix(graph, nodelist=graph.nodes(), weight='weight').toarray(), dtype=cp.float64)

    # Compute L1 normalization along axis 1 (rows)
    row_sums = cp.linalg.norm(A, ord=1, axis=1)
    A /= row_sums.reshape(-1, 1)

    # Compute the eigenvalues and eigenvectors of I_n - A
    I_n = cp.eye(graph.number_of_nodes())
    w, v = cp.linalg.eigh(I_n - A)

    # Sort the eigenvectors by the real part of the eigenvalues
    v = v[:, cp.argsort(w.real)]

    # Return the embedding
    return v[:, 1:(dim+1)].get().real  # Explicitly convert to NumPy array using .get()

In [3]:
import numpy as np
import networkx as nx
import node2vec
from gensim.models import Word2Vec
from scipy.spatial.distance import cdist
from scipy import optimize
import math
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

def read_graph(args):
    '''
    Reads the input network in networkx.
    '''
    if args["weighted"]:
        G = nx.read_edgelist(args["input"], nodetype=int, data=(('weight',float),), create_using=nx.DiGraph())
    else:
        G = nx.read_edgelist(args["input"], nodetype=int, create_using=nx.DiGraph())
        for edge in G.edges():
            G[edge[0]][edge[1]]['weight'] = 1
    #G = nx.read_edgelist(file_name, nodetype=int)
    if not args["directed"]:
        G = G.to_undirected()
    return G
    
def fitting_func(dims,s,a,L):  
      return s/np.power(dims,a) + L

In [4]:
def identify_optimal_dim(embedding_dims, loss):
    '''
    Identify the optimal dimension range and compute the curve fitting parameter for graph.
    '''  
    (s,a,l),cov = optimize.curve_fit(fitting_func, embedding_dims,loss)
    fit_values = (fitting_func(np.array(embedding_dims),s,a,l))
    MSE = ((np.array(loss)-np.array(fit_values))**2).mean()
    opt = np.power((s/0.05),1/a)
    print('the optimal dimension at 0.05 accuracy level is {}'.format(int(math.ceil(opt))))
    print('the MSE of curve fitting is {}'.format(MSE))

def cal_cosine_matrices(G, args):
    '''
    Compute the cosine distance between every node pair over different embedding dimensions.
    '''
    norm_loss = []
    node_num = len(G.nodes())
    if node_num < args['end_dim']:
        args['end_dim'] = node_num-1
    embedding_dims = list(range(args["start_dim"],args["end_dim"],args["step"]))
    if node_num < 500:
        embedding_dims.insert(0,node_num-1)
        print('graph size smaller than the default end dimension, thus has been automatically set to {}'.format(node_num))
    else:
        embedding_dims.insert(0,500)  
    #cosine_matrices = np.zeros((len(embedding_dims),node_num,node_num)) 
    for _index, dim in enumerate(embedding_dims):
        # print (dim)
        # model = Word2Vec(walks, vector_size=dim,window=args["window_size"], min_count=0, sg=1, workers=args["workers"], epochs=args["iter"])    
        #emb_matrix = np.zeros((node_num,dim))      
        emb_matrix= lap_cupy(G, dim)#model[str(node)] 
        emb_matrix = emb_matrix - np.mean(emb_matrix,axis=0) 
        cosine_matrix = 1 - cdist(emb_matrix,emb_matrix,'cosine')
        if _index == 0:
            benchmark_array = np.array(upper_tri_masking(cosine_matrix))
            #np.savez_compressed('./pic/conect_data/npz/{}'.format(str.split(args.input,'/')[6]),benchmark_array)      
        else:
            dim_array = np.array(upper_tri_masking(cosine_matrix)) 
            loss = np.linalg.norm((dim_array-benchmark_array),ord=1)
            norm_loss.append(loss/len(dim_array))
    return embedding_dims[1:],norm_loss
    
def upper_tri_masking(A):
    '''
    Masking the upper triangular matrix. 
    '''
    m = A.shape[0]
    r = np.arange(m)
    mask = r[:,None] < r
    return A[mask]
  
def cal_embedding_distance(args):
    '''
    The overall random walk, graph embedding and cosine distance calculation process.
    '''
    nx_G = read_graph(args)
    
    dims, loss = cal_cosine_matrices(nx_G, args)
    print("this is a flag")
    print(dims,loss)
    plt.plot(dims,loss)
    plt.savefig('./a.png')
    identify_optimal_dim(dims, loss)
    #return cosine_matrices

In [5]:
cal_embedding_distance(args)
# The result is not good. Because the football network has 115 nodes only. 

graph size smaller than the default end dimension, thus has been automatically set to 115


  A = cp.asarray(nx.adjacency_matrix(graph, nodelist=graph.nodes(), weight='weight').toarray(), dtype=cp.float64)


this is a flag
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113] [0.9999794874681333, 0.6414235054151521, 0.4893476953973873, 0.40796127845208713, 0.35632502789504006, 0.31286178003880377, 0.2727480166982685, 0.2434671627843103, 0.21524337908175747, 0.19761682191397204, 0.18164523369626917, 0.17116079982430088, 0.1661546206734942, 0.1608383715274257, 0.15329464858962322, 0.14911335812230214, 0.14387385330526264, 0.1412756520001435, 0.13858196846789395, 0.13578592245602863, 0.13245010050786635, 0.12945864536003035, 0.12680647352205948, 0.1242875332758748, 0.12220864745331075, 0.11972993