# Line Graph for embbedding Edge representation

## Some Idees to explore
- Line Graph as embbedding for pre-training then NCN on Original Graph, Transfer Learning ??
- Line Graph as an augmentation for contrastive learning like LGLP ?
- Line Graph for Edge Features embbedding, but adjency Matrix fro Original Graph
- Non-Contrastive Learning with 2 GNN one for Feature, one for Structure, then reuse theses two GNN for encoder for the final task

In [1]:
import time
import torch
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
import networkx as nx
from model_contrastive import Encoder, GRACE, drop_feature
from model import CNLinkPredictor, GCN
from NeighborOverlap import train, test
from ogbdataset import loaddataset
from ogb.linkproppred import PygLinkPropPredDataset, Evaluator
from torch.utils.tensorboard import SummaryWriter
from utils_contrastive import compute_pr, eigenvector_centrality
from torch_geometric.utils import dropout_adj
from torch_geometric.transforms import LineGraph
from torch_geometric.utils.convert import to_networkx

import torch
from sklearn.metrics import roc_auc_score, average_precision_score
from ogb.linkproppred import PygLinkPropPredDataset
import torch_geometric.transforms as T
from torch_sparse import SparseTensor
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import train_test_split_edges, negative_sampling, to_undirected
from torch_geometric.transforms import RandomLinkSplit



In [2]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

In [3]:
hp = {
    'xdp': 0.7,
    'tdp': 0.3,
    'pt': 0.75,
    'gnnedp': 0.0,
    'preedp': 0.4,
    'predp': 0.05,
    'gnndp': 0.05,
    'probscale': 4.3,
    'proboffset': 2.8,
    'alpha': 1.0,
    'gnnlr': 0.0043,
    'prelr': 0.0024,
    'batch_size': 1152,
    'ln': True,
    'lnnn': True,
    'epochs': 100,
    'runs': 1,
    'hiddim': 256,
    'mplayers': 1,
    'testbs': 8192,
    'maskinput': True,
    'jk': True,
    'use_xlin': True,
    'tailact': True,
}
device = torch.device(f'cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
def legacy_train(epoch, model, predictor, data, split_edge, optimizer, evaluator, hp):
            t1 = time.time()
            loss = train(model, predictor, data, split_edge, optimizer,
                         hp['batch_size'], hp['maskinput'], [], None)
            if epoch % 10 == 0:
                print(f"10 train time {time.time()-t1:.2f} s, loss {loss:.4f}", flush=True)

In [5]:
def legacy_test(run, epoch, model, predictor, data, split_edge, evaluator, bestscore, writer, hp):
    t1 = time.time()
    results, h = test(model, predictor, data, split_edge, evaluator,
                   8192, False)
    print(f"test time {time.time()-t1:.2f} s")
    if bestscore is None:
        bestscore = {key: list(results[key]) for key in results}
    for key, result in results.items():
        writer.add_scalars(f"{key}_{run}", {
            "trn": result[0],
            "val": result[1],
            "tst": result[2]
        }, epoch)
        train_hits, valid_hits, test_hits = result
        if valid_hits > bestscore[key][1]:
            bestscore[key] = list(result)
        print(key)
        print(f'Run: {run + 1:02d}, '
              f'Epoch: {epoch:02d}, '
              f'Train: {100 * train_hits:.2f}%, '
              f'Valid: {100 * valid_hits:.2f}%, '
              f'Test: {100 * test_hits:.2f}%')
    print('---', flush=True)

In [6]:
def pretrain_grace(model, data):
    param = {
        'learning_rate': 0.01,
        'num_hidden': 256,
        'num_proj_hidden': 32,
        'activation': 'prelu',
        'base_model': 'GCNConv',
        'num_layers': 2,
        'drop_edge_rate_1': 0.3,
        'drop_edge_rate_2': 0.4,
        'drop_feature_rate_1': 0.1,
        'drop_feature_rate_2': 0.0,
        'tau': 0.4,
        'num_epochs': 3000,
        'weight_decay': 1e-5,
        'drop_scheme': 'degree',
    }
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=param['learning_rate'],
        weight_decay=param['weight_decay']
    )
    t1 = time.time()
    for epoch in range(1, param['num_epochs'] + 1):
        model.train()
        optimizer.zero_grad()

        edge_index_1 = dropout_adj(data.edge_index, p=param[f'drop_edge_rate_{1}'])[0]
        edge_index_2 = dropout_adj(data.edge_index, p=param[f'drop_edge_rate_{2}'])[0]
        x_1 = drop_feature(data.x, param['drop_feature_rate_1'])
        x_2 = drop_feature(data.x, param['drop_feature_rate_2'])

        z1 = model(x_1, edge_index_1)
        z2 = model(x_2, edge_index_2)

        loss = model.loss(z1, z2)
        loss.backward()
        optimizer.step()
        if epoch % 100 == 0:
            print(f'(T) | Epoch={epoch:03d}, loss={loss:.4f}')
    print(f"pretrain time {time.time()-t1:.2f} s, loss {loss:.4f}", flush=True)

In [12]:
def loaddataset_line(name):
    if name in ["Cora", "Citeseer", "Pubmed"]:
        dataset = Planetoid(root="dataset", name=name)
    data = dataset[0]
    print(data)
    src_nodes = data.edge_index[0]
    target_nodes = data.edge_index[1]
    edge_attr = (
        data.x[src_nodes] + data.x[target_nodes]
    )  # aggregation des attributs de noeuds
    data_c = data.clone()
    data_c.x = edge_attr
    line = LineGraph()(data_c)
    line = line.to(device)
    dataset = [line]
    data, split_edge = loaddataset(dataset, False)
    return data, split_edge