In [1]:
import numpy as np
import networkx as nx
import pickle as pk
import torch
import matplotlib.pyplot as plt
from stellargraph.data import UnsupervisedSampler
from tqdm import tqdm

from stellargraph import StellarGraph
from stellargraph.mapper import GraphSAGENodeGenerator,GraphSAGELinkGenerator
from stellargraph.data import EdgeSplitter
from stellargraph.layer import GraphSAGE, HinSAGE, link_classification

from tensorflow import keras
from sklearn import preprocessing, feature_extraction, model_selection

from stellargraph import globalvar
from stellargraph import datasets
from IPython.display import display, HTML

import logging
import time
from collections import defaultdict
import os
import scipy.sparse as sp
import torch
from torch.nn import functional as F

datasetname = 'SH_S'#有数据集SH_S、SH_L、MV
threshold1 = 0.5
threshold2 = 0.5
batch_size = 128
epochs = 20
num_samples = [30, 20]
layer_sizes = [64, 64]

best_suc = [0]*21
best_pre = [0]*21
best_recall = [0]*21
pro_best_suc = [0]*21
pro_best_pre = [0]*21
pro_best_recall = [0]*21
test_config = 'C2.1'

def getg(data,minsup=1):
    logger.warning('minsup : %d' % minsup)
    g = nx.Graph()
    n = len(data.item_method_id)
    # 顶点
    point = []
    for i in range(n):
        point.append(i)
    g.add_nodes_from(point)
    # 边权重
    edglist = []
    edges = set()
    for user, items in tqdm(data.invocation_mx.items()):
        for i in range(len(items)):
            for j in range(i+1,len(items)):
                edges.add((items[i],items[j]))
    
    for edg in tqdm(edges):
        weight = float(data.adj[edg[0],edg[1]])
        if weight>=minsup:
            edglist.append((edg[0],edg[1],weight))
        #edglist.append((edg[0],edg[1]))

    g.add_weighted_edges_from(edglist)
    #g.add_edges_from(edglist)
    return g

def to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

def build_my_new_adj_matrix(data,train_dict):
    n = len(data.item_method_id)
    A = sp.dok_matrix((n, n), dtype=np.float32)
    FD = defaultdict(int)
    for user, items in tqdm(train_dict.items()):
        for i in range(len(items)):
            FD[items[i]] += 1
            for j in range(i+1,len(items)):
                if A[items[i],items[j]] == 0:
                    A[items[i],items[j]] = A[items[j],items[i]] = len(data.invocation_mx.items())
                else:
                    A[items[i], items[j]] = A[items[j], items[i]] = A[items[j], items[i]] + len(data.invocation_mx.items())
    print('build_my_new_adj_matrix finish')
    data.FD = FD
    data.adj = to_torch_sparse_tensor(A)
    

def load_mydata(dataset_name):
    name = './tmp/%s-mydata.pk' % dataset_name
    if not os.path.exists(name):
        print('no file.')
    with open(name, 'rb') as f:
        data = pk.load(f)
        print('load dataset from disk.')
    #data.adj = to_torch_sparse_tensor(data.adj)
    return data

def get_node_embeddings(data,minsup):
    t1 = time.time()
    g = getg(data,minsup)
    t2 = time.time()
    logger.warning('build time : %d' % (t2-t1))
    logger.info('================================================')
    g_feature_attr = g.copy()
    for node_id, node_data in g_feature_attr.nodes(data=True):
        node_data["feature"] = data.item_pre_emb[node_id].numpy()

    G = StellarGraph.from_networkx(
        g_feature_attr, node_features="feature", node_type_default="API", edge_type_default="Attribute"
    )
    edge_splitter_test = EdgeSplitter(G)

    # Randomly sample a fraction p=0.1 of all positive links, and same number of negative links, from G, and obtain the
    # reduced graph G_test with the sampled links removed:
    G_test, edge_ids_test, edge_labels_test = edge_splitter_test.train_test_split(
        p=0.1, method="global", keep_connected=True
    )
    # Define an edge splitter on the reduced graph G_test:
    edge_splitter_train = EdgeSplitter(G_test)

    # Randomly sample a fraction p=0.1 of all positive links, and same number of negative links, from G_test, and obtain the
    # reduced graph G_train with the sampled links removed:
    G_train, edge_ids_train, edge_labels_train = edge_splitter_train.train_test_split(
        p=0.1, method="global", keep_connected=True
    )

    train_gen = GraphSAGELinkGenerator(G_train, batch_size, num_samples)
    train_flow = train_gen.flow(edge_ids_train, edge_labels_train, shuffle=True)
    test_gen = GraphSAGELinkGenerator(G_test, batch_size, num_samples)
    test_flow = test_gen.flow(edge_ids_test, edge_labels_test)
    graphsage = GraphSAGE(
        layer_sizes=layer_sizes, generator=train_gen, bias=True, dropout=0.3
    )
    x_inp, x_out = graphsage.in_out_tensors()
    prediction = link_classification(
        output_dim=1, output_act="relu", edge_embedding_method="ip"
    )(x_out)
    model = keras.Model(inputs=x_inp, outputs=prediction)

    model.compile(
        optimizer=keras.optimizers.Adam(lr=1e-3),
        loss=keras.losses.binary_crossentropy,
        metrics=[keras.metrics.binary_accuracy],
    )
    history = model.fit(train_flow, epochs=epochs, validation_data=test_flow, verbose=2)
    x_inp_src = x_inp[0::2]
    x_out_src = x_out[0]
    embedding_model = keras.Model(inputs=x_inp_src, outputs=x_out_src)
    node_ids = G_train.nodes()
    node_gen = GraphSAGENodeGenerator(G_train, batch_size, num_samples).flow(node_ids)
    node_embeddings = embedding_model.predict(node_gen, workers=4, verbose=1)
    return node_embeddings

#threshold1取值（0，1）表示考虑节点相似特征的阈值，值越大候选特征节点越少
#threshold2取值（0，1）表示节点属性特征的重要性，越小越不重要
def build_new_relation(ratings,threshold1 = 0.8,threshold2 = 0.001):
    dataset = load_mydata(datasetname)
    n = len(ratings)
    for i in tqdm(range(n)):
        for j in range(i+1,n):
            simily = float(ratings[i][j])
            if simily >= threshold1:
                dataset.adj[i,j] = threshold2*simily
    return dataset
    
def get_my_top_items(tensor):
    item_dict = {}
    for i in tqdm(range(len(tensor))):
        if tensor[i].item() !=0 :
            item_dict[i] = tensor[i].item()
    #print('get_my_top_items==>item_dict',item_dict)
    top_items = [item[0] for item in sorted(item_dict.items(),key=lambda item:item[1],reverse=True)]
    #print('get_my_top_items==>top_items',top_items)
    return top_items[:21]

def get_my_top_items2(data,adj,Q,ratings,a,b):
    #链路预测
    diag = torch.diag(ratings) #获取对角为一维向量
    diag_embed = torch.diag_embed(diag)  # 由diag恢复为对角矩阵
    link_embed = ratings - diag_embed
    rowsoftmax = F.softmax(link_embed,dim=1)
    link_q1 = torch.zeros(size=[len(rowsoftmax[0])])
    for q in Q:
        link_q1 = link_q1 + rowsoftmax[q]
        
    #贝叶斯预测
    M = len(data.invocation_mx.items())
    D = set()
    FD = data.FD
    
    for q in Q:
        tensor = adj[q]
        for i in range(len(tensor)):
            if tensor[i].item() != 0:
                D.add(i)

    link_q2 = torch.zeros(size=[len(adj)])
    print(len(link_q2))
    for d in D:
        fd = FD[d]
        fdq = 1
        for q in Q:
            tensor = adj[q]
            fdq = fdq*(tensor[d].item()*1.0/fd)
        #利用贝叶斯求得d被预测的概率
        p2 = fdq*fd*1.0/M
        link_q2[d] = p2
    
    link_q1 = F.normalize(link_q1, p=2, dim=0)
    link_q2 = F.normalize(link_q2, p=2, dim=0)
    link_q = link_q1*a+link_q2*b
    arr,top_items = torch.sort(link_q,descending=True)
    top_items = top_items[:21]
    #top_items = [item[0] for item in sorted(item_dict.items(), key=lambda item: item[1], reverse=True)]
    # print('get_my_top_items==>top_items',top_items)
    return top_items.numpy()

def myeval(dataset,ratings,a,b):
    test_set = dataset.test_dict
    logger.info('test start. test set size: %d' % len(test_set))
    t1 = time.time()
    users = np.asarray(list(test_set.keys()))  # 训练集的方法编号数组

    top_items = []
    used_items = []

    for userid in tqdm(users):
        used_items.append(set(dataset.train_dict[userid]))
        #print(dataset.train_dict[userid],dataset.train_dict[userid][0])
        #top_items.append(get_my_top_items(dataset.adj[dataset.train_dict[userid][0]]))
        top_items.append(get_my_top_items2(dataset,dataset.adj,dataset.train_dict[userid],ratings,a,b))

    #print('myeval2=>top_items',top_items)
    #print('myeval2=>used_items', used_items)

    items = []
    for i, item in enumerate(top_items):  # 第i个测试方法推荐的API列表item
        # if i<=20:
        rec_item = [tid for tid in item if tid not in used_items[i]]
        # print(rec_item)
        items.append(rec_item[:20])

    def getMAP(N):
        qarr = []
        for i, uid in enumerate(users):
            r = 0
            drarr = []
            for k in range(1, N+1):
                intersect = set(items[i][:k]) & set(test_set[uid])
                p = len(intersect) / k
                newr = len(intersect) / len(set(test_set[uid]))
                dr = (newr-r)*p
                drarr.append(dr)
                r = newr
            qarr.append(np.sum(drarr))
        return np.sum(qarr)/len(qarr)
     
    def res_at_k(k):
        suc_methods = []
        precisions = []
        recalls = []
        proj_suc = defaultdict(list)
        proj_pre = defaultdict(list)
        proj_recall = defaultdict(list)

        for i, uid in enumerate(users):
            pid = dataset.test_user2proj[uid]
            intersect = set(items[i][:k]) & set(test_set[uid])
            if len(intersect) > 0:
                suc_methods.append(uid)
                proj_suc[pid].append(1)
            else:
                logger.debug('failed uid %d' % uid)
                logger.debug('GT:{}, REC:{}'.format(test_set[uid], items[i]))
                proj_suc[pid].append(0)
            p = len(intersect) / k
            r = len(intersect) / len(set(test_set[uid]))
            precisions.append(p)
            recalls.append(r)
            proj_pre[pid].append(p)
            proj_recall[pid].append(r)
        suc_rate = len(suc_methods) / len(users)
        logger.info('----------------------result@%d--------------------------' % k)
        logger.info('success rate at method level %f' % (suc_rate))
        logger.info('mean precision:%f, mean recall:%f' % (np.mean(precisions), np.mean(recalls)))

        suc_project = [np.mean(val) for val in proj_suc.values()]
        pres = [np.mean(val) for val in proj_pre.values()]
        recs = [np.mean(val) for val in proj_recall.values()]
        logger.info('**********************************************************')
        logger.info('success rate at project level %f' % (np.mean(np.mean(suc_project))))
        logger.info('mean precision:%f, mean recall:%f' % (np.mean(pres), np.mean(recs)))
        return suc_rate, np.mean(precisions), np.mean(recalls),np.mean(np.mean(suc_project)),np.mean(pres), np.mean(recs)

    t2 = time.time()
    logger.info('test end time: {}s'.format(t2 - t1))
    for i in range(1, 21):
        suc, pre, rec, pro_suc, pro_pre, pro_rec = res_at_k(i)
        if suc > best_suc[i]:
            best_suc[i] = suc
        if pre > best_pre[i]:
            best_pre[i] = pre
        if rec > best_recall[i]:
            best_recall[i] = rec
        logger.warning('method level => top %d : best suc %f, best pre %f,  best recall %f' % (i, best_suc[i], best_pre[i], best_recall[i]))

        if pro_suc > pro_best_suc[i]:
            pro_best_suc[i] = pro_suc
        if pro_pre > pro_best_pre[i]:
            pro_best_pre[i] = pro_pre
        if pro_rec > pro_best_recall[i]:
            pro_best_recall[i] = pro_rec
        logger.warning('project level => top %d : best suc %f, best pre %f,  best recall %f' % (i, pro_best_suc[i], pro_best_pre[i], pro_best_recall[i]))

    logger.info('MAP: %f' % (getMAP(20)))

#载入数据并划分数据集
data = load_mydata(datasetname)
data.split_data(test_config)
build_my_new_adj_matrix(data,data.train_dict)

100%|██████████| 4442/4442 [00:00<00:00, 175147.11it/s]
  2%|▏         | 106/4442 [00:00<00:04, 960.10it/s]

load dataset from disk.
total user methods:4442, test_proj:{129, 130, 4, 5, 7, 12, 16, 145, 18, 146, 150, 22, 152, 154, 159, 33, 37, 170, 44, 45, 177, 179, 180, 182, 183, 55, 185, 60, 61, 189, 74, 80, 83, 98, 106, 107, 111, 122, 123, 125}
test set methods count:175, invocations:1407
load train datas ...
train set methods count:4442, invocation: 111099


100%|██████████| 4442/4442 [00:05<00:00, 765.20it/s] 


build_my_new_adj_matrix finish


In [3]:
now = time.strftime("%Y-%m-%d-%H_%M_%S",time.localtime(time.time())) 

logging.basicConfig(format='%(asctime)s-%(levelname)s:%(message)s',
                    filename='./log/GLAPI-GraphSAGE_SHS_minsup_'+'_'+now+'.log',
                    filemode='a', level=logging.INFO)
logger = logging.getLogger(__name__)

In [4]:
for i in range(100000,1417000,100000):
    minsup=i
    t1 = time.time()
    node_embeddings = get_node_embeddings(data,1)
    t2 = time.time()
    logger.warning('train time : %d' % (t2-t1))
    logger.info('================================================')
    logger.info('minsup:%f;'%(minsup))
    #得到节点嵌入和链接相似性
    api_embeddings = torch.from_numpy(node_embeddings)
    ratings = api_embeddings.mm(api_embeddings.transpose(0, 1))
    myeval(data,ratings,threshold1,threshold2)

100%|██████████| 4442/4442 [00:00<00:00, 107395.61it/s]
100%|██████████| 68050/68050 [00:27<00:00, 2485.31it/s]


** Sampled 22 positive and 22 negative edges. **
** Sampled 20 positive and 20 negative edges. **
link_classification: using 'ip' method to combine node embeddings into edge embeddings


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/20
1/1 - 2s - loss: 0.5401 - binary_accuracy: 0.6750 - val_loss: 1.0011 - val_binary_accuracy: 0.5455
Epoch 2/20
1/1 - 0s - loss: 0.4736 - binary_accuracy: 0.8000 - val_loss: 0.9437 - val_binary_accuracy: 0.5455
Epoch 3/20
1/1 - 0s - loss: 0.3924 - binary_accuracy: 0.8250 - val_loss: 0.8897 - val_binary_accuracy: 0.5909
Epoch 4/20
1/1 - 0s - loss: 0.3889 - binary_accuracy: 0.8750 - val_loss: 0.8375 - val_binary_accuracy: 0.6136
Epoch 5/20
1/1 - 0s - loss: 0.3794 - binary_accuracy: 0.8500 - val_loss: 0.7900 - val_binary_accuracy: 0.6136
Epoch 6/20
1/1 - 0s - loss: 0.2368 - binary_accuracy: 0.9500 - val_loss: 0.7449 - val_binary_accuracy: 0.6364
Epoch 7/20
1/1 - 0s - loss: 0.2759 - binary_accuracy: 0.9250 - val_loss: 0.7039 - val_binary_accuracy: 0.6364
Epoch 8/20
1/1 - 0s - loss: 0.3654 - binary_accuracy: 0.8500 - val_loss: 0.6676 - val_binary_accuracy: 0.6364
Epoch 9/20
1/1 - 0s - loss: 0.2543 - binary_accuracy: 0.9250 - val_loss: 0.6320 - val_binary_accuracy: 0.6364
Epoch 10/2

  0%|          | 0/175 [00:00<?, ?it/s]

5351


  1%|          | 1/175 [00:00<00:27,  6.25it/s]

5351


  2%|▏         | 3/175 [00:00<00:44,  3.91it/s]

5351


  2%|▏         | 4/175 [00:00<00:33,  5.10it/s]

5351


  3%|▎         | 5/175 [00:01<00:27,  6.13it/s]

5351


  3%|▎         | 6/175 [00:01<00:26,  6.47it/s]

5351
5351


  5%|▍         | 8/175 [00:02<00:52,  3.20it/s]

5351
5351


  5%|▌         | 9/175 [00:02<00:59,  2.80it/s]

5351


  6%|▋         | 11/175 [00:03<00:51,  3.20it/s]

5351
5351


  8%|▊         | 14/175 [00:03<00:31,  5.12it/s]

5351
5351
5351


 10%|▉         | 17/175 [00:04<00:32,  4.86it/s]

5351
5351


 10%|█         | 18/175 [00:04<00:48,  3.24it/s]

5351
5351


 11%|█         | 19/175 [00:05<00:58,  2.67it/s]

5351


 11%|█▏        | 20/175 [00:05<01:04,  2.39it/s]

5351
5351


 14%|█▎        | 24/175 [00:06<00:39,  3.86it/s]

5351
5351


 14%|█▍        | 25/175 [00:07<00:50,  2.99it/s]

5351


 15%|█▍        | 26/175 [00:07<00:48,  3.08it/s]

5351
5351


 17%|█▋        | 29/175 [00:08<00:36,  3.96it/s]

5351
5351


 18%|█▊        | 31/175 [00:08<00:25,  5.57it/s]

5351
5351


 18%|█▊        | 32/175 [00:08<00:26,  5.34it/s]

5351
5351


 20%|██        | 35/175 [00:09<00:28,  4.83it/s]

5351
5351


 21%|██        | 36/175 [00:09<00:31,  4.35it/s]

5351
5351


 21%|██        | 37/175 [00:10<00:33,  4.07it/s]

5351


 22%|██▏       | 39/175 [00:10<00:29,  4.60it/s]

5351
5351


 23%|██▎       | 41/175 [00:10<00:25,  5.18it/s]

5351
5351


 25%|██▌       | 44/175 [00:11<00:26,  4.95it/s]

5351
5351


 26%|██▌       | 45/175 [00:11<00:23,  5.60it/s]

5351
5351


 27%|██▋       | 48/175 [00:12<00:19,  6.58it/s]

5351
5351


 28%|██▊       | 49/175 [00:12<00:19,  6.53it/s]

5351
5351


 29%|██▉       | 51/175 [00:12<00:26,  4.76it/s]

5351
5351


 31%|███       | 54/175 [00:13<00:18,  6.54it/s]

5351
5351


 32%|███▏      | 56/175 [00:13<00:15,  7.68it/s]

5351
5351


 33%|███▎      | 58/175 [00:13<00:13,  8.47it/s]

5351
5351


 34%|███▍      | 60/175 [00:13<00:13,  8.70it/s]

5351
5351


 35%|███▍      | 61/175 [00:13<00:13,  8.65it/s]

5351
5351


 37%|███▋      | 64/175 [00:14<00:12,  9.06it/s]

5351
5351


 37%|███▋      | 65/175 [00:14<00:13,  8.11it/s]

5351
5351


 39%|███▉      | 68/175 [00:15<00:18,  5.67it/s]

5351
5351


 40%|████      | 70/175 [00:15<00:15,  6.69it/s]

5351
5351


 41%|████      | 72/175 [00:15<00:13,  7.37it/s]

5351
5351


 42%|████▏     | 73/175 [00:15<00:13,  7.75it/s]

5351
5351


 43%|████▎     | 75/175 [00:16<00:19,  5.18it/s]

5351
5351


 43%|████▎     | 76/175 [00:16<00:21,  4.67it/s]

5351


 45%|████▍     | 78/175 [00:17<00:30,  3.22it/s]

5351
5351


 45%|████▌     | 79/175 [00:17<00:29,  3.29it/s]

5351


 46%|████▋     | 81/175 [00:18<00:26,  3.53it/s]

5351
5351


 47%|████▋     | 83/175 [00:18<00:27,  3.29it/s]

5351
5351


 49%|████▊     | 85/175 [00:19<00:24,  3.62it/s]

5351
5351


 50%|█████     | 88/175 [00:19<00:17,  4.90it/s]

5351
5351


 51%|█████     | 89/175 [00:20<00:16,  5.10it/s]

5351
5351


 51%|█████▏    | 90/175 [00:20<00:22,  3.70it/s]

5351


 53%|█████▎    | 92/175 [00:21<00:29,  2.79it/s]

5351
5351


 54%|█████▍    | 95/175 [00:21<00:16,  4.84it/s]

5351
5351


 55%|█████▍    | 96/175 [00:22<00:22,  3.57it/s]

5351
5351


 55%|█████▌    | 97/175 [00:22<00:25,  3.04it/s]

5351


 57%|█████▋    | 99/175 [00:23<00:29,  2.57it/s]

5351
5351


 58%|█████▊    | 102/175 [00:24<00:18,  3.92it/s]

5351
5351


 59%|█████▉    | 104/175 [00:24<00:12,  5.65it/s]

5351
5351


 61%|██████    | 106/175 [00:24<00:10,  6.89it/s]

5351
5351


 62%|██████▏   | 108/175 [00:24<00:08,  7.91it/s]

5351
5351


 63%|██████▎   | 110/175 [00:25<00:07,  8.50it/s]

5351
5351


 63%|██████▎   | 111/175 [00:25<00:15,  4.11it/s]

5351
5351


 65%|██████▍   | 113/175 [00:26<00:12,  5.03it/s]

5351
5351


 66%|██████▌   | 115/175 [00:27<00:21,  2.73it/s]

5351
5351


 67%|██████▋   | 117/175 [00:28<00:26,  2.22it/s]

5351


 68%|██████▊   | 119/175 [00:28<00:15,  3.65it/s]

5351
5351


 69%|██████▊   | 120/175 [00:28<00:12,  4.39it/s]

5351
5351


 70%|███████   | 123/175 [00:28<00:09,  5.66it/s]

5351
5351


 71%|███████▏  | 125/175 [00:29<00:07,  6.98it/s]

5351
5351


 72%|███████▏  | 126/175 [00:29<00:07,  6.85it/s]

5351
5351


 74%|███████▎  | 129/175 [00:29<00:06,  7.58it/s]

5351
5351
5351


 75%|███████▍  | 131/175 [00:29<00:05,  8.31it/s]

5351
5351


 76%|███████▌  | 133/175 [00:30<00:07,  5.50it/s]

5351
5351


 77%|███████▋  | 135/175 [00:30<00:06,  5.98it/s]

5351
5351


 79%|███████▉  | 138/175 [00:31<00:05,  6.22it/s]

5351
5351


 80%|████████  | 140/175 [00:31<00:04,  7.14it/s]

5351
5351


 81%|████████  | 142/175 [00:32<00:09,  3.62it/s]

5351
5351


 82%|████████▏ | 144/175 [00:32<00:05,  5.20it/s]

5351
5351


 83%|████████▎ | 145/175 [00:32<00:06,  4.49it/s]

5351
5351


 85%|████████▍ | 148/175 [00:33<00:05,  5.06it/s]

5351
5351


 85%|████████▌ | 149/175 [00:33<00:05,  5.18it/s]

5351
5351


 87%|████████▋ | 152/175 [00:34<00:05,  3.88it/s]

5351
5351


 88%|████████▊ | 154/175 [00:35<00:03,  5.31it/s]

5351
5351


 89%|████████▊ | 155/175 [00:35<00:04,  4.51it/s]

5351
5351


 90%|█████████ | 158/175 [00:36<00:03,  4.54it/s]

5351
5351


 91%|█████████ | 159/175 [00:36<00:03,  4.62it/s]

5351
5351


 93%|█████████▎| 162/175 [00:37<00:02,  5.00it/s]

5351
5351


 94%|█████████▎| 164/175 [00:37<00:01,  5.96it/s]

5351
5351


 94%|█████████▍| 165/175 [00:37<00:01,  6.28it/s]

5351
5351


 96%|█████████▌| 168/175 [00:37<00:00,  7.24it/s]

5351
5351
5351


 97%|█████████▋| 169/175 [00:38<00:01,  4.59it/s]

5351


 97%|█████████▋| 170/175 [00:38<00:01,  3.61it/s]

5351


 98%|█████████▊| 171/175 [00:39<00:01,  3.09it/s]

5351


 99%|█████████▉| 173/175 [00:40<00:00,  3.16it/s]

5351
5351


100%|██████████| 175/175 [00:40<00:00,  4.34it/s]
100%|██████████| 4442/4442 [00:00<00:00, 95214.02it/s]
  0%|          | 251/68050 [00:00<00:27, 2502.50it/s]

5351


100%|██████████| 68050/68050 [00:27<00:00, 2477.85it/s]


** Sampled 4 positive and 4 negative edges. **
** Sampled 4 positive and 4 negative edges. **
link_classification: using 'ip' method to combine node embeddings into edge embeddings
Epoch 1/20
1/1 - 2s - loss: 0.6356 - binary_accuracy: 0.6250 - val_loss: 0.7947 - val_binary_accuracy: 0.5000
Epoch 2/20
1/1 - 0s - loss: 0.6108 - binary_accuracy: 0.6250 - val_loss: 0.7608 - val_binary_accuracy: 0.5000
Epoch 3/20
1/1 - 0s - loss: 0.5261 - binary_accuracy: 0.6250 - val_loss: 0.7212 - val_binary_accuracy: 0.5000
Epoch 4/20
1/1 - 0s - loss: 0.5037 - binary_accuracy: 0.6250 - val_loss: 0.6855 - val_binary_accuracy: 0.5000
Epoch 5/20
1/1 - 0s - loss: 0.4376 - binary_accuracy: 0.7500 - val_loss: 0.6500 - val_binary_accuracy: 0.6250
Epoch 6/20
1/1 - 0s - loss: 0.2735 - binary_accuracy: 1.0000 - val_loss: 0.6154 - val_binary_accuracy: 0.7500
Epoch 7/20
1/1 - 0s - loss: 0.2598 - binary_accuracy: 1.0000 - val_loss: 0.5833 - val_binary_accuracy: 0.7500
Epoch 8/20
1/1 - 0s - loss: 0.3336 - binary_accur

  0%|          | 0/175 [00:00<?, ?it/s]

5351


  1%|          | 1/175 [00:00<00:28,  6.17it/s]

5351


  2%|▏         | 4/175 [00:00<00:34,  5.01it/s]

5351
5351


  3%|▎         | 6/175 [00:01<00:27,  6.17it/s]

5351
5351
5351


  5%|▍         | 8/175 [00:02<00:55,  3.02it/s]

5351
5351


  5%|▌         | 9/175 [00:02<01:04,  2.59it/s]

5351


  6%|▋         | 11/175 [00:03<00:54,  3.01it/s]

5351
5351


  8%|▊         | 14/175 [00:03<00:33,  4.83it/s]

5351
5351


  9%|▉         | 16/175 [00:04<00:38,  4.10it/s]

5351
5351


 10%|▉         | 17/175 [00:04<00:33,  4.69it/s]

5351
5351


 11%|█         | 19/175 [00:05<00:58,  2.66it/s]

5351
5351


 11%|█▏        | 20/175 [00:06<01:05,  2.37it/s]

5351
5351


 14%|█▎        | 24/175 [00:06<00:39,  3.84it/s]

5351
5351


 14%|█▍        | 25/175 [00:07<00:50,  2.99it/s]

5351
5351


 15%|█▍        | 26/175 [00:07<00:48,  3.07it/s]

5351


 17%|█▋        | 29/175 [00:08<00:36,  3.95it/s]

5351
5351


 18%|█▊        | 31/175 [00:08<00:25,  5.57it/s]

5351
5351


 18%|█▊        | 32/175 [00:08<00:26,  5.33it/s]

5351
5351


 20%|██        | 35/175 [00:09<00:29,  4.75it/s]

5351
5351


 21%|██        | 36/175 [00:09<00:31,  4.42it/s]

5351


 21%|██        | 37/175 [00:10<00:33,  4.07it/s]

5351
5351


 23%|██▎       | 40/175 [00:10<00:24,  5.49it/s]

5351
5351


 23%|██▎       | 41/175 [00:11<00:26,  5.06it/s]

5351
5351


 25%|██▌       | 44/175 [00:11<00:26,  4.91it/s]

5351
5351


 26%|██▌       | 45/175 [00:11<00:23,  5.61it/s]

5351
5351


 27%|██▋       | 48/175 [00:12<00:19,  6.63it/s]

5351
5351


 28%|██▊       | 49/175 [00:12<00:19,  6.59it/s]

5351
5351


 30%|██▉       | 52/175 [00:13<00:22,  5.38it/s]

5351
5351


 31%|███       | 54/175 [00:13<00:18,  6.40it/s]

5351
5351


 32%|███▏      | 56/175 [00:13<00:15,  7.52it/s]

5351
5351


 33%|███▎      | 58/175 [00:13<00:14,  8.22it/s]

5351
5351


 34%|███▍      | 60/175 [00:13<00:13,  8.45it/s]

5351
5351


 35%|███▌      | 62/175 [00:14<00:12,  8.81it/s]

5351
5351


 37%|███▋      | 64/175 [00:14<00:12,  8.84it/s]

5351
5351


 37%|███▋      | 65/175 [00:14<00:14,  7.77it/s]

5351
5351


 39%|███▉      | 68/175 [00:15<00:19,  5.37it/s]

5351
5351


 40%|████      | 70/175 [00:15<00:16,  6.37it/s]

5351
5351


 41%|████      | 72/175 [00:15<00:14,  7.19it/s]

5351
5351


 42%|████▏     | 73/175 [00:15<00:13,  7.58it/s]

5351
5351


 43%|████▎     | 75/175 [00:16<00:19,  5.17it/s]

5351
5351


 43%|████▎     | 76/175 [00:16<00:21,  4.68it/s]

5351


 45%|████▍     | 78/175 [00:17<00:30,  3.20it/s]

5351
5351


 46%|████▋     | 81/175 [00:18<00:26,  3.52it/s]

5351
5351


 47%|████▋     | 82/175 [00:18<00:22,  4.05it/s]

5351
5351


 49%|████▊     | 85/175 [00:19<00:25,  3.55it/s]

5351
5351


 50%|████▉     | 87/175 [00:20<00:21,  4.16it/s]

5351
5351


 50%|█████     | 88/175 [00:20<00:18,  4.74it/s]

5351
5351


 51%|█████     | 89/175 [00:20<00:17,  4.93it/s]

5351


 51%|█████▏    | 90/175 [00:20<00:23,  3.63it/s]

5351


 52%|█████▏    | 91/175 [00:21<00:27,  3.03it/s]

5351


 54%|█████▎    | 94/175 [00:22<00:20,  3.92it/s]

5351
5351


 54%|█████▍    | 95/175 [00:22<00:17,  4.68it/s]

5351
5351


 55%|█████▍    | 96/175 [00:22<00:22,  3.51it/s]

5351


 56%|█████▌    | 98/175 [00:23<00:28,  2.69it/s]

5351
5351


 57%|█████▋    | 99/175 [00:24<00:30,  2.50it/s]

5351


 58%|█████▊    | 102/175 [00:24<00:18,  3.85it/s]

5351
5351


 59%|█████▉    | 103/175 [00:24<00:15,  4.71it/s]

5351
5351


 61%|██████    | 106/175 [00:25<00:10,  6.64it/s]

5351
5351


 62%|██████▏   | 108/175 [00:25<00:08,  7.64it/s]

5351
5351


 63%|██████▎   | 110/175 [00:25<00:07,  8.31it/s]

5351
5351


 63%|██████▎   | 111/175 [00:26<00:15,  4.13it/s]

5351
5351


 65%|██████▍   | 113/175 [00:26<00:12,  5.03it/s]

5351
5351


 65%|██████▌   | 114/175 [00:27<00:18,  3.33it/s]

5351


 66%|██████▌   | 115/175 [00:27<00:22,  2.70it/s]

5351


 67%|██████▋   | 118/175 [00:28<00:19,  2.86it/s]

5351
5351


 69%|██████▊   | 120/175 [00:28<00:12,  4.36it/s]

5351
5351


 70%|██████▉   | 122/175 [00:29<00:10,  5.28it/s]

5351
5351


 71%|███████   | 124/175 [00:29<00:08,  6.18it/s]

5351
5351


 72%|███████▏  | 126/175 [00:29<00:07,  6.71it/s]

5351
5351


 73%|███████▎  | 128/175 [00:30<00:07,  6.70it/s]

5351
5351


 74%|███████▍  | 130/175 [00:30<00:05,  7.98it/s]

5351
5351


 75%|███████▍  | 131/175 [00:30<00:05,  8.08it/s]

5351
5351


 76%|███████▌  | 133/175 [00:30<00:08,  5.09it/s]

5351


 77%|███████▋  | 135/175 [00:31<00:07,  5.65it/s]

5351
5351


 78%|███████▊  | 137/175 [00:31<00:07,  5.29it/s]

5351
5351


 79%|███████▉  | 139/175 [00:31<00:05,  6.60it/s]

5351
5351


 80%|████████  | 140/175 [00:32<00:05,  7.00it/s]

5351
5351


 82%|████████▏ | 143/175 [00:33<00:07,  4.41it/s]

5351
5351


 82%|████████▏ | 144/175 [00:33<00:05,  5.25it/s]

5351
5351


 84%|████████▍ | 147/175 [00:34<00:06,  4.23it/s]

5351
5351


 85%|████████▍ | 148/175 [00:34<00:05,  5.05it/s]

5351
5351


 85%|████████▌ | 149/175 [00:34<00:04,  5.24it/s]

5351


 87%|████████▋ | 152/175 [00:35<00:05,  3.91it/s]

5351
5351


 88%|████████▊ | 154/175 [00:35<00:03,  5.36it/s]

5351
5351


 89%|████████▊ | 155/175 [00:36<00:04,  4.52it/s]

5351
5351


 90%|█████████ | 158/175 [00:36<00:03,  4.58it/s]

5351
5351


 91%|█████████ | 159/175 [00:37<00:03,  4.79it/s]

5351
5351


 93%|█████████▎| 162/175 [00:37<00:02,  5.05it/s]

5351
5351


 94%|█████████▎| 164/175 [00:37<00:01,  6.11it/s]

5351
5351


 94%|█████████▍| 165/175 [00:38<00:01,  6.40it/s]

5351
5351


 96%|█████████▌| 168/175 [00:38<00:00,  7.38it/s]

5351
5351


 97%|█████████▋| 169/175 [00:38<00:01,  4.35it/s]

5351
5351


 98%|█████████▊| 171/175 [00:39<00:01,  2.90it/s]

5351


 99%|█████████▉| 173/175 [00:40<00:00,  3.08it/s]

5351
5351


100%|██████████| 175/175 [00:40<00:00,  4.29it/s]

5351
5351



100%|██████████| 4442/4442 [00:00<00:00, 106003.67it/s]
100%|██████████| 68050/68050 [00:27<00:00, 2492.96it/s]


** Sampled 1 positive and 1 negative edges. **
** Sampled 1 positive and 1 negative edges. **
link_classification: using 'ip' method to combine node embeddings into edge embeddings
Epoch 1/20
1/1 - 2s - loss: 0.8774 - binary_accuracy: 0.5000 - val_loss: 0.6795 - val_binary_accuracy: 0.5000
Epoch 2/20
1/1 - 0s - loss: 0.1548 - binary_accuracy: 1.0000 - val_loss: 0.6770 - val_binary_accuracy: 0.5000
Epoch 3/20
1/1 - 0s - loss: 0.4209 - binary_accuracy: 1.0000 - val_loss: 0.6715 - val_binary_accuracy: 0.5000
Epoch 4/20
1/1 - 0s - loss: 0.9728 - binary_accuracy: 0.5000 - val_loss: 0.6618 - val_binary_accuracy: 0.5000
Epoch 5/20
1/1 - 0s - loss: 0.2763 - binary_accuracy: 1.0000 - val_loss: 0.6486 - val_binary_accuracy: 0.5000
Epoch 6/20
1/1 - 0s - loss: 0.3792 - binary_accuracy: 1.0000 - val_loss: 0.6367 - val_binary_accuracy: 0.5000
Epoch 7/20
1/1 - 0s - loss: 0.6093 - binary_accuracy: 0.5000 - val_loss: 0.6259 - val_binary_accuracy: 0.5000
Epoch 8/20
1/1 - 0s - loss: 0.3842 - binary_accur

  0%|          | 0/175 [00:00<?, ?it/s]

5351


  1%|          | 1/175 [00:00<00:29,  5.92it/s]

5351


  2%|▏         | 4/175 [00:00<00:34,  4.98it/s]

5351
5351


  3%|▎         | 6/175 [00:01<00:27,  6.16it/s]

5351
5351
5351


  5%|▍         | 8/175 [00:02<00:52,  3.17it/s]

5351
5351


  6%|▋         | 11/175 [00:03<00:51,  3.20it/s]

5351
5351


  7%|▋         | 13/175 [00:03<00:36,  4.43it/s]

5351
5351


  8%|▊         | 14/175 [00:03<00:31,  5.03it/s]

5351
5351


 10%|▉         | 17/175 [00:04<00:33,  4.78it/s]

5351
5351
5351


 11%|█         | 19/175 [00:05<00:58,  2.67it/s]

5351


 12%|█▏        | 21/175 [00:05<00:50,  3.08it/s]

5351
5351


 13%|█▎        | 23/175 [00:06<00:47,  3.18it/s]

5351
5351


 14%|█▎        | 24/175 [00:06<00:38,  3.95it/s]

5351
5351


 14%|█▍        | 25/175 [00:07<00:51,  2.93it/s]

5351


 15%|█▍        | 26/175 [00:07<00:48,  3.07it/s]

5351


 17%|█▋        | 29/175 [00:08<00:38,  3.84it/s]

5351
5351


 18%|█▊        | 31/175 [00:08<00:27,  5.17it/s]

5351
5351


 18%|█▊        | 32/175 [00:08<00:28,  5.07it/s]

5351


 19%|█▉        | 33/175 [00:09<00:37,  3.75it/s]

5351
5351


 20%|██        | 35/175 [00:09<00:29,  4.76it/s]

5351
5351


 21%|██        | 37/175 [00:10<00:33,  4.13it/s]

5351
5351


 23%|██▎       | 40/175 [00:10<00:24,  5.50it/s]

5351
5351


 23%|██▎       | 41/175 [00:10<00:26,  5.05it/s]

5351
5351


 25%|██▌       | 44/175 [00:11<00:26,  4.90it/s]

5351
5351


 26%|██▋       | 46/175 [00:11<00:20,  6.39it/s]

5351
5351


 27%|██▋       | 47/175 [00:11<00:19,  6.61it/s]

5351
5351


 28%|██▊       | 49/175 [00:12<00:19,  6.52it/s]

5351
5351


 30%|██▉       | 52/175 [00:12<00:23,  5.32it/s]

5351
5351


 31%|███       | 54/175 [00:13<00:18,  6.43it/s]

5351
5351


 32%|███▏      | 56/175 [00:13<00:15,  7.46it/s]

5351
5351


 33%|███▎      | 58/175 [00:13<00:14,  8.23it/s]

5351
5351


 34%|███▍      | 60/175 [00:13<00:13,  8.49it/s]

5351
5351


 35%|███▌      | 62/175 [00:14<00:12,  8.72it/s]

5351
5351


 37%|███▋      | 64/175 [00:14<00:12,  8.68it/s]

5351
5351


 37%|███▋      | 65/175 [00:14<00:14,  7.59it/s]

5351
5351


 39%|███▉      | 68/175 [00:15<00:19,  5.43it/s]

5351
5351


 40%|████      | 70/175 [00:15<00:16,  6.52it/s]

5351
5351


 41%|████      | 72/175 [00:15<00:14,  7.31it/s]

5351
5351


 42%|████▏     | 73/175 [00:15<00:13,  7.66it/s]

5351
5351


 42%|████▏     | 74/175 [00:16<00:15,  6.60it/s]

5351


 43%|████▎     | 76/175 [00:16<00:21,  4.70it/s]

5351
5351


 45%|████▍     | 78/175 [00:17<00:30,  3.17it/s]

5351
5351


 46%|████▌     | 80/175 [00:18<00:33,  2.84it/s]

5351


 46%|████▋     | 81/175 [00:18<00:27,  3.37it/s]

5351
5351


 47%|████▋     | 82/175 [00:18<00:24,  3.79it/s]

5351


 47%|████▋     | 83/175 [00:19<00:30,  3.03it/s]

5351


 49%|████▊     | 85/175 [00:19<00:26,  3.46it/s]

5351
5351


 50%|█████     | 88/175 [00:20<00:18,  4.79it/s]

5351
5351


 51%|█████     | 89/175 [00:20<00:17,  4.97it/s]

5351
5351


 52%|█████▏    | 91/175 [00:21<00:27,  3.05it/s]

5351
5351


 53%|█████▎    | 93/175 [00:21<00:25,  3.21it/s]

5351
5351


 54%|█████▍    | 95/175 [00:22<00:16,  4.79it/s]

5351
5351


 55%|█████▍    | 96/175 [00:22<00:22,  3.55it/s]

5351


 55%|█████▌    | 97/175 [00:23<00:26,  2.98it/s]

5351


 57%|█████▋    | 99/175 [00:24<00:30,  2.46it/s]

5351
5351


 58%|█████▊    | 102/175 [00:24<00:19,  3.79it/s]

5351
5351


 59%|█████▉    | 104/175 [00:24<00:12,  5.50it/s]

5351
5351


 61%|██████    | 106/175 [00:25<00:10,  6.76it/s]

5351
5351


 62%|██████▏   | 108/175 [00:25<00:08,  7.82it/s]

5351
5351


 63%|██████▎   | 110/175 [00:25<00:07,  8.24it/s]

5351
5351
5351


 65%|██████▍   | 113/175 [00:26<00:12,  4.91it/s]

5351
5351


 65%|██████▌   | 114/175 [00:27<00:18,  3.28it/s]

5351
5351


 66%|██████▋   | 116/175 [00:28<00:24,  2.39it/s]

5351
5351


 68%|██████▊   | 119/175 [00:28<00:15,  3.60it/s]

5351
5351


 69%|██████▊   | 120/175 [00:28<00:12,  4.33it/s]

5351
5351


 70%|██████▉   | 122/175 [00:29<00:10,  5.28it/s]

5351
5351


 71%|███████▏  | 125/175 [00:29<00:07,  6.85it/s]

5351
5351


 72%|███████▏  | 126/175 [00:29<00:07,  6.80it/s]

5351
5351


 73%|███████▎  | 128/175 [00:30<00:06,  6.80it/s]

5351
5351
5351


 75%|███████▍  | 131/175 [00:30<00:05,  8.28it/s]

5351
5351


 76%|███████▌  | 133/175 [00:30<00:07,  5.36it/s]

5351
5351


 77%|███████▋  | 135/175 [00:31<00:06,  5.88it/s]

5351
5351


 79%|███████▉  | 138/175 [00:31<00:05,  6.19it/s]

5351
5351


 80%|████████  | 140/175 [00:32<00:04,  7.12it/s]

5351
5351
5351


 82%|████████▏ | 143/175 [00:33<00:07,  4.04it/s]

5351
5351


 82%|████████▏ | 144/175 [00:33<00:06,  4.75it/s]

5351
5351


 84%|████████▍ | 147/175 [00:34<00:07,  3.78it/s]

5351
5351


 85%|████████▍ | 148/175 [00:34<00:06,  4.49it/s]

5351
5351


 85%|████████▌ | 149/175 [00:34<00:05,  4.61it/s]

5351


 86%|████████▋ | 151/175 [00:35<00:08,  2.96it/s]

5351
5351


 88%|████████▊ | 154/175 [00:35<00:04,  5.19it/s]

5351
5351


 89%|████████▊ | 155/175 [00:36<00:04,  4.49it/s]

5351


 89%|████████▉ | 156/175 [00:36<00:06,  3.15it/s]

5351


 90%|█████████ | 158/175 [00:37<00:03,  4.48it/s]

5351
5351


 91%|█████████ | 159/175 [00:37<00:03,  4.69it/s]

5351
5351


 93%|█████████▎| 162/175 [00:37<00:02,  5.00it/s]

5351
5351


 94%|█████████▎| 164/175 [00:38<00:01,  5.97it/s]

5351
5351


 94%|█████████▍| 165/175 [00:38<00:01,  6.32it/s]

5351
5351


 96%|█████████▌| 168/175 [00:38<00:00,  7.24it/s]

5351
5351
5351


 97%|█████████▋| 170/175 [00:39<00:01,  3.34it/s]

5351
5351


 98%|█████████▊| 171/175 [00:40<00:01,  2.91it/s]

5351


 99%|█████████▉| 174/175 [00:40<00:00,  3.69it/s]

5351
5351


100%|██████████| 175/175 [00:41<00:00,  4.26it/s]
100%|██████████| 4442/4442 [00:00<00:00, 107409.23it/s]
  0%|          | 243/68050 [00:00<00:28, 2421.60it/s]

5351


100%|██████████| 68050/68050 [00:27<00:00, 2469.28it/s]


IndexError: single positional indexer is out-of-bounds

In [5]:
data.adj

tensor(indices=tensor([[ 224,    5,  224,  ...,  676,  237,  617],
                       [   5,  224,  470,  ..., 2630,  617,  237]]),
       values=tensor([550808., 550808., 119934.,  ...,   8884.,   8884.,
                        8884.]),
       size=(5351, 5351), nnz=120444, layout=torch.sparse_coo)

In [7]:
edges = set()
for user, items in tqdm(data.invocation_mx.items()):
        for i in range(len(items)):
            for j in range(i+1,len(items)):
                edges.add((items[i],items[j]))

100%|██████████| 4442/4442 [00:00<00:00, 37185.82it/s]


In [12]:
max = 0
for edg in tqdm(edges):
    weight = float(data.adj[edg[0],edg[1]])
    if weight>max:
        max = weight
print(max)

100%|██████████| 68050/68050 [00:27<00:00, 2483.96it/s]

1416998.0





In [11]:
count = 0
for user, items in tqdm(data.invocation_mx.items()):
    count = count + len(items)
print(count)

100%|██████████| 4442/4442 [00:00<00:00, 2037076.14it/s]

27943





In [11]:
link_q_test = torch.zeros(size=[10])
link_q_test[0] = 0.001
link_q_test[2] = -0.1
link_q_test[5] = 0.3
link_q_test[8] = 0.4
link_q_test

tensor([ 0.0010,  0.0000, -0.1000,  0.0000,  0.0000,  0.3000,  0.0000,  0.0000,
         0.4000,  0.0000])

In [13]:
link_mean = torch.Tensor.mean(link_q_test)
link_var = torch.Tensor.var(link_q_test,False)


tensor(0.0224)

In [20]:

output = F.normalize(link_q_test, p=2, dim=0)
output

tensor([ 0.0020,  0.0000, -0.1961,  0.0000,  0.0000,  0.5883,  0.0000,  0.0000,
         0.7845,  0.0000])