In [3]:
# 复现DeepWalk
# 2022.07.25
# referece github links:https://github.com/dsgiitr/graph_nets/blob/master/DeepWalk/DeepWalk.py

In [4]:
import os
project_path = os.getcwd()

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import random

# DeepWalk

In [5]:
adj_list = [[1,2,3],[0,2,3],[0,1,3],[0,1,2],[5,6],[4,6],[4,5],[1,3]]
size_vertex = len(adj_list) # number of vertices

In [6]:
adj_list

[[1, 2, 3], [0, 2, 3], [0, 1, 3], [0, 1, 2], [5, 6], [4, 6], [4, 5], [1, 3]]

## variables

In [7]:
w = 3 # window size
d = 3 # embedding size
y = 200 # walks per ventext
t = 6 # walk length
lr = 0.025 # learning rate

In [8]:
v = [0,1,2,3,4,5,6] # labels of available vertices

## random_walk

In [9]:
adj_list[0]

[1, 2, 3]

In [10]:
def RandomWalk(node, t):
    walk = [node]

    for i in range(t - 1): # 控制步长循环次数
        list_length = len(adj_list[node])-1 # node邻接节点个数
        node_index = random.randint(0, list_length) # 随机生成的索引值，randit生成[low,high]闭区间内的一个整数值
        node = adj_list[node][node_index]  # 取一个邻居节点
        walk.append(node)
    return walk

In [11]:
RandomWalk(4,t)

[4, 5, 4, 5, 4, 5]

## model

In [12]:
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.phi  = nn.Parameter(torch.rand((size_vertex, d), requires_grad=True)) # size_vertex是one-hot向量维度，d是word embedding维度
        self.phi2 = nn.Parameter(torch.rand((d, size_vertex), requires_grad=True))
        
        
    def forward(self, one_hot):
        hidden = torch.matmul(one_hot, self.phi)
        out    = torch.matmul(hidden, self.phi2)
        return out

model = Model()

In [14]:
# def One_Hot(wvi,v):
#     # wvi表示random walk生成的node sequence;v表示node sequence
#     from torch.nn import functional
#     label = torch.tensor(wvi)  # 将列表转换为tensor类型，列表中元素代表的是索引值
#     num_class = len(set(v)) # 代表节点个数
#     label2one_hot = functional.one_hot(label, num_classes=num_class)
#     print("LongTensor:", label2one_hot)       #LongTensor类型
#     print("ndarray:", label2one_hot.numpy())  # ndarray 类型
#     print("list:", label2one_hot.numpy().tolist())  # list 类型
#     return label2one_hot

## skip_gram

In [49]:
# def skip_gram(wvi,  w):
#     # wvi表示random walk生成的node sequence；w = 3 window size
#     for j in range(len(wvi)):
#         for k in range(max(0,j-w) , min(j+w+1, len(wvi))): # 确定中心点j左右范围，k遍历中心点j的左右位置
# #             print(j,' ',k)
#             #generate one hot vector
#             one_hot          = torch.zeros(size_vertex)
#             one_hot[wvi[j]]  = 1
            
#             out              = model(one_hot)
#             loss             = torch.log(torch.sum(torch.exp(out))) - out[wvi[k]]
# #             print(loss)
#             loss.backward()
            
#             for param in model.parameters():
# #                 print(param)
#                 param.data.sub_(lr*param.grad)
#                 param.grad.data.zero_()

In [52]:
def skip_gram(wvi, w):
    # wvi表示random walk生成的node sequence；w = 3 window size
    for j in range(len(wvi)):

        # # 计算上下文词数量C
        # c = min(j + w + 1, len(wvi)) - max(0, j - w)

        # 损失函数loss=每个panel的第ti=1那个位置误差error之和
        # generate one hot vector
        one_hot = torch.zeros(size_vertex)
        one_hot[wvi[j]] = 1  # 中心词输入one-hot向量
        # 未激活的输出向量
        out = model(one_hot)

        loss = 0
        # for c-th panel
        for k in range(max(0, j - w), min(j + w + 1, len(wvi))):  # 确定中心点j左右范围，k遍历中心点j的左右位置
            # 损失函数e=每个panel的第j个位置误差之和
            error = torch.log(torch.sum(torch.exp(out))) - out[wvi[k]]
            loss += error
        loss.backward()

        for param in model.parameters():
            param.data.sub_(lr * param.grad)
            param.grad.data.zero_() # param.grad就是每次迭代计算的误差error，需要每次重置为0

In [53]:
for i in range(y):
    random.shuffle(v)
    for vi in v:
        wvi=RandomWalk(vi,t)
        skip_gram(wvi, w)

In [54]:
print(model.phi)

Parameter containing:
tensor([[ 0.9548,  0.5674,  0.2238],
        [ 0.5477, -0.1744,  0.9595],
        [ 0.5044,  0.5926,  0.7232],
        [ 0.9641, -0.0202,  0.3248],
        [-0.8217,  1.0645, -0.5738],
        [-0.6344,  0.7642, -0.9929],
        [-1.0496,  0.7286, -0.6431],
        [ 0.1929,  0.9062,  0.3201]], requires_grad=True)


In [45]:
# wvi=RandomWalk(0,t) # wvi表示random walk生成的node sequence
# skip_gram(wvi, w)

In [13]:
for param in model.state_dict():
    print(param, '\t', model.state_dict()[param].size())

phi 	 torch.Size([8, 3])
phi2 	 torch.Size([3, 8])
