In [2]:
import matplotlib.pyplot as plt
import numpy as np
import cxgnncomp as cxgc

def prepare_data():
    infeat = 256
    num_head = 1
    dset = "arxiv"
    x, ptr, idx, b, edge_index = cxgc.prepare_data_full_graph(
        dset, feat_len=infeat, num_head=num_head, need_edge_index=True)
    return x, ptr, idx, b, num_head, edge_index


x, ptr, idx, b, num_head, edge_index = prepare_data()
edge_index = edge_index.cpu()
# reorder
reorder = torch.from_numpy(np.fromfile(
    "/home/huangkz/repos/rabbit_order/demo/reorder_arxiv.dat", dtype=np.int64))
edge_index = reorder[edge_index]
num_rel = 5
edge_type = np.random.choice(5, edge_index.shape[1], p=[0.6, 0.1, 0.1, 0.1, 0.1])

Loading full graph structure... dataset=arxiv feature length=256 num_head=1 undirected=True
After loading full graph structure...
num_edge 2332486 num_center 169343


In [4]:
import torch

def vertex_centric(edge_index_new, edge_type):
    num_edge = edge_index_new.shape[1]
    # sort by dst
    sorted, indices = torch.sort(edge_index_new[1])
    edge_index_new = edge_index_new[:, indices]
    edge_type = edge_type[indices]
    center = sorted[0]
    taskid = 0
    arr = []
    srcs = []
    dsts = []
    taskids = []
    types = []
    for i in range(num_edge):
        if edge_index_new[1, i] != center:
            taskid += 1
            center = edge_index_new[1, i]
        taskids.append(taskid)
        srcs.append(edge_index_new[0, i])
        dsts.append(edge_index_new[1, i])
        types.append(edge_type[i])
    with open("vertex_centric.txt", "w") as f:
        for i in range(num_edge):
            f.write(f"{srcs[i]} {dsts[i]} {taskids[i]} {types[i]}\n")

def by_degree(edge_index_new, edge_type):
    num_edge = edge_index_new.shape[1]
    deg = torch.bincount(edge_index_new[1])
    print(deg)
    edge_deg = deg[edge_index_new[1]]
    num_node = deg.shape[0]
    # sort by degree
    sorted, indices = torch.sort(edge_deg, descending=True)
    edge_index_new = edge_index_new[:, indices]
    edge_type = edge_type[indices]
    # arange = torch.arange(0, num_node)
    # arange = arange[indices]
    # edge_index_new = arange[edge_index_new]
    # edge_type =

    accumulated_dst = 0
    accumulated_src = 0
    deg_count = torch.bincount(deg)

    num_center_in_batch = 20
    num_neighbor_in_batch = 160
    taskid = 0
    f = open("edge_by_degree.txt", "w")
    edge_ptr = 0
    for d, num in enumerate(deg_count):
        if accumulated_dst == 0:
            min_deg = d
        if num > 0 and d > 0:
            accumulated_dst += num
            accumulated_src += num * d
        if accumulated_dst >= num_center_in_batch or accumulated_src >= num_neighbor_in_batch or (
                d == len(deg_count) - 1 and accumulated_dst > 0):
            for i in range(accumulated_src):
                f.write(f"{edge_index_new[0, edge_ptr + i]} {edge_index_new[1, edge_ptr + i]} {taskid} {edge_type[edge_ptr + i]}\n")
            edge_ptr += accumulated_src
            accumulated_dst = 0
            accumulated_src = 0
            taskid += 1


def by_src(edge_index_new, edge_type):
    num_edge = edge_index_new.shape[1]
    # sort by src
    sorted, indices = torch.sort(edge_index_new[0])
    edge_index_new = edge_index_new[:, indices]
    edge_type = edge_type[indices]
    center = sorted[0]
    taskid = -1
    taskids = []
    srcs = []
    dsts = []
    types = []
    num_rel = 5
    for t in range(num_rel):
        cnt = 0
        taskid += 1
        for i in range(num_edge):
            if t != edge_type[i]:
                continue
            if edge_index_new[0, i] != center:
                cnt += 1
                center = edge_index_new[0, i]
                if cnt != 0 and cnt % 16 == 0:
                    taskid += 1
            taskids.append(taskid)
            srcs.append(edge_index_new[0][i])
            dsts.append(edge_index_new[1][i])
            types.append(edge_type[i])

    with open("edge_by_src.txt", "w") as f:
        for i in range(num_edge):
            f.write(f"{srcs[i]} {dsts[i]} {taskids[i]} {types[i]}\n")


def by_src2(edge_index_new, edge_type):
    num_edge = edge_index_new.shape[1]
    # sort by src
    sorted, indices = torch.sort(edge_index_new[0])
    edge_index_new = edge_index_new[:, indices]
    edge_type = edge_type[indices]
    center = sorted[0]
    taskid = -1
    taskids = []
    srcs = []
    dsts = []
    types = []
    # num_rel = 5
    # for t in range(num_rel):
    cnt = 0
    taskid += 1
    for i in range(num_edge):
        if edge_index_new[0, i] != center: # different src
            taskid += 1
            center = edge_index_new[0, i]
        cnt += 1
        if cnt != 0 and cnt % 8 == 0:
            taskid += 1
            cnt = 0
        taskids.append(taskid)
        srcs.append(edge_index_new[0][i])
        dsts.append(edge_index_new[1][i])
        types.append(edge_type[i])

    with open("edge_by_src_gat.txt", "w") as f:
        for i in range(num_edge):
            f.write(f"{srcs[i]} {dsts[i]} {taskids[i]} {types[i]}\n")


def neighbor_group(edge_index_new, edge_type):
    limit = 32
    num_edge = edge_index_new.shape[1]
    # sort by src
    sorted, indices = torch.sort(edge_index_new[0])
    edge_index_new = edge_index_new[:, indices]
    edge_type = edge_type[indices]
    # sort by dst
    sorted, indices = torch.sort(edge_index_new[1], stable=True)
    edge_index_new = edge_index_new[:, indices]
    edge_type = edge_type[indices]
    center_src = edge_index_new[0, 0]
    center_dst = edge_index_new[1, 0]
    taskid = 0
    taskids = []
    srcs = []
    dsts = []
    types = []
    num_rel = 5

    cnt_src = 0
    cnt_dst = 0
    for i in range(num_edge):
        if edge_index_new[0, i] != center_src:
            cnt_src += 1
            if cnt_src != 0 and cnt_src % limit == 0:
                taskid += 1
        if edge_index_new[1, i] != center_dst:
            taskid += 1
            cnt_src = 0
            center_dst = edge_index_new[1, i]
        taskids.append(taskid)
        srcs.append(edge_index_new[0][i])
        dsts.append(edge_index_new[1][i])
        types.append(edge_type[i])

    max_taskid = max(taskids)
    perm = np.random.permutation(max_taskid + 1)
    taskids = [perm[i] for i in taskids]

    with open ("edge_neighbor.txt", "w") as f:
        for i in range(num_edge):
            f.write(f"{srcs[i]} {dsts[i]} {taskids[i]} {types[i]}\n")


# print(edge_index.shape)
limit = 500
edge_index_new = edge_index[:, (edge_index[0] < limit) & (edge_index[1] < limit)]
edge_type_new = edge_type[(edge_index[0] < limit) & (edge_index[1] < limit)]
by_src(edge_index_new, edge_type_new)
neighbor_group(edge_index_new, edge_type_new)
vertex_centric(edge_index_new, edge_type_new)
by_degree(edge_index_new, edge_type_new)
by_src2(edge_index_new, edge_type_new)

deg = torch.bincount(edge_index_new[0])
sorted, indices = torch.sort(deg, descending=True)
print(sorted[:])




# print(edge_index_new.shape)
# plt.figure(figsize=(3, 3))
# plt.scatter(edge_index_new[0], edge_index_new[1], s=1)

# for i in range(limit):
#     edge = edge_index_new[:, edge_index_new[1] == i]
# plt.scatter(edge[0], edge[1], s=5)


tensor([ 1,  3,  8,  3,  4,  1,  4,  2,  5,  1,  7,  3,  5,  4,  6,  4,  4,  5,
         2,  5,  3,  6,  8,  1,  8,  9,  3,  7,  9,  8,  4,  9, 11,  6,  9, 11,
        12,  7,  2,  2,  2,  1,  1,  5,  2,  1,  5,  3,  2,  2,  1,  8,  4,  4,
         1,  8,  1,  2,  1,  5,  3,  1,  3,  1,  1, 11,  3,  2,  1,  5,  3,  5,
         2,  1,  2,  6,  6,  1,  1,  8,  1,  9,  1,  5,  2,  5,  1,  2,  2,  7,
         3,  4,  6,  4,  3,  5,  3,  3,  1,  6,  2,  3,  1, 17,  2,  1,  4,  2,
         2,  2,  2,  1,  2,  2,  1,  1, 16,  1,  1,  4,  2,  5,  2,  2,  2,  5,
         2,  2,  2,  2,  1,  1,  1, 14,  0,  1,  2,  1,  1,  4,  2,  1,  2,  1,
         1,  2,  1,  7,  2,  2,  1,  3,  1,  4,  3,  1,  4,  5,  2,  7,  1,  5,
         6,  2,  5,  9,  5,  2,  6, 12,  2,  2,  3,  1,  9,  6,  2,  6,  3,  2,
         6,  1,  1,  3,  2,  3,  2,  3,  8,  2,  2,  1,  1, 19,  2,  1,  2,  3,
         1,  2,  1,  3,  9,  1,  4,  3,  2,  2,  2,  8,  3,  2,  3,  4,  3,  1,
         5,  1,  5,  2,  2,  2,  1,  3, 