In [1]:
import torch
from torch_geometric.data import InMemoryDataset, Data
import os
import os.path as osp
import numpy as np
from torch import Tensor
from torch_geometric.nn import GCNConv
import tensorflow as tf
from tensorflow.keras.layers import GRU, Dense
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
from torch.nn import Linear, Parameter
from torch.nn import ReLU, Sequential
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# 讀取文本檔 nodes&edges
with open('./Synthetic/5000/0.txt', 'r') as f:
    lines = f.readlines()

# 解析邊界資訊
edges = []
for line in lines:
    source_node_index, target_node_index = map(int, line.strip().split())
    edges.append([source_node_index, target_node_index])  
    
# print(edges)
    
# 計算節點數和邊數
num_nodes = max([max(e) for e in edges]) + 1
num_edges = len(edges)
dvnodes = (2*num_edges) / num_nodes
# print(dvnodes)

# 建立邊界矩陣(edge_index)
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
# x = torch.tensor(dvnodes, dtype=torch.long)
# x = torch.randn(num_nodes)
# print(x)

f.close()

In [3]:
# 讀取文本檔 exact BC value，然後把它當成data.x的資料
with open('./Synthetic/5000/0_score.txt', 'r') as f:
    lines = f.readlines()

# 解析節點資訊
features = []
y=[]

for line in lines:
    source_node_index, target_node_index = map(float, line.strip().split())
    features.append([dvnodes,1,1]) 
    y.append(target_node_index)
#     y.append([0])
    #這裡代表每一節點的node feature，作者把[dv, 1, 1]當成initial node features
    #在這裡我把它設定成[取得的exact BC valu, 1, 1]做為每一節點的node features
    
# print(features)
    
# 建立節點特徵矩陣
x = torch.tensor(features, dtype=torch.float)
y = torch.tensor(y, dtype=torch.long)

print(y)

f.close()

tensor([0, 0, 0,  ..., 0, 0, 0])


  y = torch.tensor(y, dtype=torch.long)


In [4]:
# 建立 Data 物件
data = Data(x=x, edge_index=edge_index, y=y)

# 顯示 Data 物件中的資訊
print(data)

Data(x=[5000, 3], edge_index=[2, 19982], y=[5000])


In [5]:
#dataset information
print('nodes:',data.num_nodes)
print('edges:',data.num_edges)
print(f'Average node degree: {(2*data.num_edges) / data.num_nodes:.2f}')
print('edge_index:',data.edge_index)
print('data.x: ',data.x)
print('data.y: ',data.y)

nodes: 5000
edges: 19982
Average node degree: 7.99
edge_index: tensor([[   0,    0,    0,  ..., 4844, 4870, 4937],
        [   4,    5,    8,  ..., 4849, 4928, 4953]])
data.x:  tensor([[7.9928, 1.0000, 1.0000],
        [7.9928, 1.0000, 1.0000],
        [7.9928, 1.0000, 1.0000],
        ...,
        [7.9928, 1.0000, 1.0000],
        [7.9928, 1.0000, 1.0000],
        [7.9928, 1.0000, 1.0000]])
data.y:  tensor([0, 0, 0,  ..., 0, 0, 0])


In [6]:
# 劃分數據集
num_nodes = data.num_nodes # 總節點數
perm = torch.randperm(num_nodes)

train_mask = perm < num_nodes * 0.6  # 60% 用來訓練
val_mask = (perm >= num_nodes * 0.6) & (perm < num_nodes * 0.8)  # 20% for valid
test_mask = perm >= num_nodes * 0.8  # 20% for test
# print(type(train_mask))
data.train_mask = train_mask
data.val_mask = val_mask
data.test_mask = test_mask

# train_index = [1, 3, 5, 7, 9] # 訓練節點的索引
# n_nodes = 10 # 總節點數

# train_mask = np.zeros(n_nodes, dtype=bool)
# train_mask[train_index] = True

In [7]:
print(data.train_mask)

tensor([False, False, False,  ...,  True, False,  True])


In [8]:
print(data.val_mask)

tensor([ True, False,  True,  ..., False,  True, False])


In [9]:
print(data.test_mask)

tensor([False,  True, False,  ..., False, False, False])


In [10]:
print(data.edge_index)

tensor([[   0,    0,    0,  ..., 4844, 4870, 4937],
        [   4,    5,    8,  ..., 4849, 4928, 4953]])


In [11]:
print(edge_index.t())

tensor([[   0,    4],
        [   0,    5],
        [   0,    8],
        ...,
        [4844, 4849],
        [4870, 4928],
        [4937, 4953]])


In [12]:
data.num_features

3

In [13]:
data.x

tensor([[7.9928, 1.0000, 1.0000],
        [7.9928, 1.0000, 1.0000],
        [7.9928, 1.0000, 1.0000],
        ...,
        [7.9928, 1.0000, 1.0000],
        [7.9928, 1.0000, 1.0000],
        [7.9928, 1.0000, 1.0000]])

In [14]:
#parameters setting
learning_rate = 0.0001
embedding_dimension = 128
mini_batch_size = 16
average_node_samping_times = 5
maximum_episodes = 10000
layer_iterations = 5
aux_ori_feature = 128
aux_feat_dim = 4 
final_feature = 1

In [15]:
#建立GCN模型 產生embedding
class GCNMaxPooling(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super().__init__(aggr='max') #使用 Max pooling layer aggregator. 
        self.lin = Linear(in_channels, out_channels)
        self.bias = torch.nn.Parameter(torch.zeros(out_channels))

    def forward(self, x, edge_index):
        # Add self-loops to the adjacency matrix.
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Linearly transform node feature matrix.
        x = self.lin(x)

        # Normalize node features.
        row, col = edge_index
        deg = degree(col, x.size(0), dtype=x.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
        norm = norm.view(-1, 1)

        # Propagate the messages.
        out = self.propagate(edge_index, x=x, norm=norm)

        # Add bias and apply activation function.
        out = out + self.bias
        out = torch.nn.functional.relu(out)

        return out

    def message(self, x_j, norm):
        # Normalize node features.
        return norm * x_j

    def update(self, aggr_out):
        # Max pooling layer aggregator.
        print('aggr_out:',aggr_out)
        return aggr_out


In [16]:
model=GCNMaxPooling(data.num_features, embedding_dimension)
#(每個node特徵維度, 想輸出的embedding dimension)

In [17]:
h = model(data.x, data.edge_index)
print('encoder GCN model output',h)
print(f'Embedding shape: {list(h.shape)}')

aggr_out: tensor([[-4.2260, -0.2578, -0.5104,  ...,  3.0376, -3.0985, -4.3057],
        [-4.2260, -0.2578, -0.5104,  ...,  3.0376, -3.0985, -4.3057],
        [-4.2260, -0.2578, -0.5104,  ...,  3.0376, -3.0985, -4.3057],
        ...,
        [-0.8452, -0.0516, -0.1021,  ...,  0.6075, -0.6197, -0.8611],
        [-0.8452, -0.0516, -0.1021,  ...,  0.6075, -0.6197, -0.8611],
        [-0.8452, -0.0516, -0.1021,  ...,  0.6075, -0.6197, -0.8611]],
       grad_fn=<ScatterReduceBackward0>)
encoder GCN model output tensor([[0.0000, 0.0000, 0.0000,  ..., 3.0376, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 3.0376, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 3.0376, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.6075, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.6075, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.6075, 0.0000, 0.0000]],
       grad_fn=<ReluBackward0>)
Embedding shape: [5000, 128]


In [18]:
print(h) #顯示產生出來的embedding

tensor([[0.0000, 0.0000, 0.0000,  ..., 3.0376, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 3.0376, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 3.0376, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.6075, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.6075, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.6075, 0.0000, 0.0000]],
       grad_fn=<ReluBackward0>)


In [19]:
#建立two layer MLP當成decoder
class TwoLayerMLP(torch.nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        super(TwoLayerMLP, self).__init__()
        self.linear1 = Linear(in_features, hidden_features)
        self.activation = ReLU()
        self.linear2 = Linear(hidden_features, out_features)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.activation(out)
        out = self.linear2(out)
        return out

In [20]:
modeltwo = TwoLayerMLP(aux_ori_feature,aux_feat_dim,final_feature)

In [21]:
h2 = modeltwo(h)

In [22]:
#用來看產生出來的ranking score
# x=4500
# print(h2[4900])
# for x in range(5000):
#     print(h2[x])


In [23]:
 print(data.y)
print(type(data.y))

tensor([0, 0, 0,  ..., 0, 0, 0])
<class 'torch.Tensor'>


In [24]:
def train(model, modeltwo, data, optimizer):
    model.train()
    optimizer.zero_grad()
    output = model(data.x, data.edge_index)
#     print(output)
    finaloutput = modeltwo(output)
    print(finaloutput)

#     Finalll=finaloutput.tolist()
#     print(type(Finalll))

#     print('OUT TRAIN MASK',finaloutput[data.train_mask])
#     print('******************')
#     print('Y TRAIN MASK',data.y[data.train_mask])
#     data.y[data.train_mask] = data.y[data.train_mask].squeeze(1)
    loss = criterion(finaloutput[data.train_mask], data.y[data.train_mask])
#     loss = F.nll_loss(finaloutput[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

In [25]:
def test(model, modeltwo, data):
    model.eval()
    output = model(data.x, data.edge_index)
    finaloutput = modeltwo(output)
    pred = finaloutput.argmax(dim=1)
    correct = pred.eq(data.y)
    accuracy = torch.sum(correct[data.test_mask]).item() / data.test_mask.sum().item()
    return accuracy

In [26]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Assuming that we are on a CUDA machine, this should print a CUDA device:

print(device)

# model=GCNMaxPooling(data.num_features, embedding_dimension).to(dtype=torch.long, device=device)
# modeltwo = TwoLayerMLP(aux_ori_feature,aux_feat_dim,final_feature).to(dtype=torch.long, device=device)

# model = GCN(input_dim=data.num_features, hidden_dim=16, output_dim=dataset.num_classes)
model=GCNMaxPooling(data.num_features, embedding_dimension).to(device=device)
modeltwo = TwoLayerMLP(aux_ori_feature,aux_feat_dim,final_feature).to(device=device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4)

for epoch in range(20): #maximum_episodes = 10000
    loss = train(model, modeltwo, data, optimizer)
    acc = test(model, modeltwo, data)
    print(f"Epoch: {epoch + 1}, Loss: {loss:.4f}, Accuracy: {acc:.4f}")

cpu
aggr_out: tensor([[ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        [ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        [ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        ...,
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294],
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294],
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294]],
       grad_fn=<ScatterReduceBackward0>)
tensor([[-0.7383],
        [-0.7383],
        [-0.7383],
        ...,
        [-0.3761],
        [-0.3761],
        [-0.3761]], grad_fn=<AddmmBackward0>)
aggr_out: tensor([[ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        [ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        [ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        ...,
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294],
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.

aggr_out: tensor([[ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        [ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        [ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        ...,
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294],
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294],
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294]],
       grad_fn=<ScatterReduceBackward0>)
Epoch: 9, Loss: 0.0000, Accuracy: 1.0000
aggr_out: tensor([[ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        [ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        [ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        ...,
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294],
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294],
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294]],
       grad_fn=<ScatterReduceB

aggr_out: tensor([[ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        [ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        [ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        ...,
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294],
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294],
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294]],
       grad_fn=<ScatterReduceBackward0>)
Epoch: 17, Loss: 0.0000, Accuracy: 1.0000
aggr_out: tensor([[ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        [ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        [ 1.3092, -1.2458,  4.7707,  ..., -2.3640,  1.5771, -2.6470],
        ...,
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294],
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294],
        [ 0.2618, -0.2492,  0.9541,  ..., -0.4728,  0.3154, -0.5294]],
       grad_fn=<ScatterReduce

In [27]:
#下面是建立多個圖檔資料集的class設定

In [28]:
class MyDataset(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(MyDataset, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return ['0.txt', '1.txt','2.txt','3.txt','4.txt','5.txt']

    @property
    def processed_file_names(self):
        return ['data.pt']

    def download(self):
        # 下載數據集，這邊因為已經有數據集了，所以pass
        pass

    def process(self):
        # 讀取txt檔，建立多個graph，然後把他們轉換成Data
        data_list = []
        for i, file in enumerate(self.raw_paths):
            edge_index = []
            node_features = []
            label = []
            
            with open(file, 'r') as f:
                for line in f:
                    if line.startswith('node'):
                        node_id, feature, label_ = line.strip().split('')
                        node_features.append([float(x) for x in feature.split(',')])
                        label.append(int(label_))
                    else:
                        src, dst = line.strip().split('\t')
                        edge_index.append([int(src), int(dst)])
            edge_index = torch.tensor(edge_index).t().contiguous()
            node_features = torch.tensor(node_features, dtype=torch.float)
            label = torch.tensor(label, dtype=torch.long)
            data = Data(x=node_features, edge_index=edge_index, y=label)
            data_list.append(data)
        # 把Data列表轉換成PyTorch Geometric的Datalist
        data, slices = self.collate(data_list)
        # 保存DataList
        torch.save((data, slices), self.processed_paths[0])

        

In [29]:
model = MyDataset(root='./Synthetic/5000')

Processing...
Done!


In [30]:
print(model[0])
print(model[1])
print(model[2])
print(model[3])
print(model[4])
print(model[5])

Data(x=[0], edge_index=[2, 19982], y=[0])
Data(x=[0], edge_index=[2, 19981], y=[0])
Data(x=[0], edge_index=[2, 19980], y=[0])
Data(x=[0], edge_index=[2, 19982], y=[0])
Data(x=[0], edge_index=[2, 19984], y=[0])
Data(x=[0], edge_index=[2, 19981], y=[0])


In [31]:
print(model[0].edge_index)

tensor([[   0,    0,    0,  ..., 4844, 4870, 4937],
        [   4,    5,    8,  ..., 4849, 4928, 4953]])


In [32]:
print(len(model))

6
