In [2]:
'''
2023.01.12，复现HAN(Heterogeneous Graph Attention Network)
dependencies:
    torch-1.10.0
    numpy-1.22.0
    networkx-2.8.8
    scipy-1.7.3
'''

'\n2023.01.12，复现HAN(Heterogeneous Graph Attention Network)\ndependencies:\n    torch-1.10.0\n    numpy-1.22.0\n    networkx-2.8.8\n    scipy-1.7.3\n'

In [3]:
import numpy as np
import pandas as pd

import torch
import os
project_path = os.getcwd()

In [4]:
project_path

'D:\\PycharmProjects\\GNN Algorithms\\HAN'

# utils

## data process

In [5]:
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
from scipy.sparse.linalg.eigen.arpack import eigsh
import sys

### adj_to_bias

In [6]:
"""
 Prepare adjacency matrix by expanding up to a given neighbourhood.
 This will insert loops on every node.
 Finally, the matrix is converted to bias vectors.
 Expected shape: [graph, nodes, nodes]
"""

'\n Prepare adjacency matrix by expanding up to a given neighbourhood.\n This will insert loops on every node.\n Finally, the matrix is converted to bias vectors.\n Expected shape: [graph, nodes, nodes]\n'

In [48]:
# 邻接矩阵转换为偏置矩阵
def adj_to_bias(adj, sizes, nhood=1): # 邻接矩阵adjacency matrix,(1,3025,3025)
    num_graphs = adj.shape[0]  # 一个graph，一个邻接矩阵，返回邻接矩阵number
    matrix = np.empty(adj.shape)  # 根据给定的维度和数值类型，numpy创建一个新的ndarray数组，其元素不进行初始化
    for g in range(num_graphs):  # 第几个graph
        matrix[g] = np.eye(adj.shape[1])  # 返回一个单位矩阵，一个对角线为1的ndarray数组
        for n in range(nhood):
            matrix[g] = np.matmul(matrix[g], (adj[g]+np.eye(adj.shape[1]))) # graph邻接矩阵+单位阵，再乘单位阵
        # 判断每个graph 邻接矩阵元素的数值
        for i in range(sizes[g]):
            for j in range(sizes[g]):
                if matrix[g][i][j] > 0.0:
                    matrix[g][i][j] = 1.0
    return -1e9 * (1.0 - matrix)

### loading_file

In [50]:
# load file
def loading_file(filename):
    "parse index file."
    file = []
    for line in open(filename):
        file.append(int(file.strip()))
    return file

### sample_mask

In [52]:
# 生成掩码bool数组
def sample_mask(index, l):
    "create mask"
    mask = np.zeros(l)  # 生成全是0的数组
    mask[index] = 1
    return np.array(mask, dtype=np.bool)

### sparse_elements

In [56]:
# 提取函数
def extract_elements(mx):
    if not sp.isspmatrix_coo(mx):
        mx = mx.tocoo()  # 转换成coo稀疏矩阵
    coo_index = np.vstack((mx.row, mx.col)).transpose()  # row和col是index array；vstack按行上下拼接
    values = mx.data
    shape = mx.shape
    return coo_index, values, shape

In [59]:
# 提取sparse稀疏矩阵元素
def sparse_elements(sparse_mx):
    "convert sparse matrix to tuple representation"
    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = extract_elements(sparse_mx[i])
    else:
        sparse_mx = extract_elements(sparse_mx)
    
    return sparse_mx

## layers

In [68]:
import torch
import torch.nn as nn
import torch.nn.functional as F

### attn_head

In [98]:
def attn_head(features, out_sz, bias_mat, activation, in_drop=0.0, coef_drop=0.0, residual=False, 
              return_coef=False):
    if in_drop != 0.0:
        features = F.dropout(features, p=1-in_drop )
    features_conv1d = F.conv1d(features, weight=out_sz, stride=1, bias=False)
    
    f_1 = F.conv1d(features_conv1d, 1, 1)
    f_2 = F.conv1d(features_conv1d, 1, 1)
    
    logits = f_1 + torch.permute(f_2, (0,2,1)) # 转置
    coefs = F.softmax(nn.LeakyReLU(logits) + bias_mat)
    
    if coef_drop != 0.0:
        coefs = F.dropout(coefs, p=1-coef_drop)
    if in_drop != 0.0:
        features_conv1d = F.dropout(features_conv1d, p=1-in_drop)
    
    vals = torch.matmul(coefs, features_conv1d)
    ret = torch.add(vals, bias)
    
    # residual connection 残差连接
    if residual:
        if features.shape[-1] != ret.shape[-1]:
            ret = ret + F.conv1d(features, ret.shape[-1], 1)  # activation
        else:
            features_conv1d = ret + features
    if return_coef:
        return activation(ret), coefs
    else:
        return activation(ret)  # activation