In [1]:
import os
import numpy as np
import pandas as pd
from scipy import sparse
from tqdm import tqdm
import torch
import torch.optim as optim
from datasets import *
from utils import *
from model.EdgeReg import *
from model.EdgeReg_v2 import *

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"]="3"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#########################################################################################################

dataset_name = "ng20"
data_dir = os.path.join('dataset/clean', dataset_name)

train_batch_size=100
test_batch_size=100

train_set = TextDataset(dataset_name, data_dir, subset='train')
test_set = TextDataset(dataset_name, data_dir, subset='test')
train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=train_batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=test_batch_size, shuffle=True)

#########################################################################################################
y_dim = train_set.num_classes()
num_bits = 32
num_features = train_set[0][1].size(0)
num_nodes = len(train_set)
edge_weight = 1.0

# print("Train node2hash model ...")
# print("dataset: {}".format(args.dataset))
# print("numbits: {}".format(args.nbits))
# print("gpu id:  {}".format(args.gpunum))
# print("dropout probability: {}".format(args.dropout))
# print("num epochs: {}".format(args.num_epochs))
# print("learning rate: {}".format(args.lr))
# print("num train: {} num test: {}".format(len(train_set), len(test_set)))

#########################################################################################################

walk_type, max_nodes = "BFS-20".split('-') #args.walk.split('-')
max_nodes = int(max_nodes)
print("Walk type: {} with maximum nodes of: {}".format(walk_type, max_nodes))

if walk_type == 'BFS':
    neighbor_sample_func = BFS_walk
elif walk_type == 'DFS':
    neighbor_sample_func = DFS_walk
elif walk_type == 'Random':
    neighbor_sample_func = Random_walk
else:
    neighbor_sample_func = None
    print("The model will only takes the immediate neighbors.")
    #assert(False), "unknown walk type (has to be one of the following: BFS, DFS, Random)"

def get_neighbors(ids, df, max_nodes, batch_size, traversal_func):
    cols = []
    rows = []
    for idx, node_id in enumerate(ids):
        col = traversal_func(df, node_id.item(), max_nodes)
        rows += [idx] * len(col)
        cols += col
    data = [1] * len(cols)
    connections = sparse.csr_matrix((data, (rows, cols)), shape=(batch_size, len(df)))
    return torch.from_numpy(connections.toarray()).type(torch.FloatTensor)


AttributeError: 'list' object has no attribute 'toarray'

In [None]:
model = EdgeReg_v2(dataset_name, num_features, num_nodes, 
                   num_bits, dropoutProb=0.1, device=device, T=5)
model.to(device)

In [None]:
for step, (ids, xb, yb, nb) in tqdm(enumerate(train_loader), ncols=50, total=len(train_loader)):
    xb = xb.to(device)
    yb = yb.to(device)

    if neighbor_sample_func is not None:
        nb = get_neighbors(ids, train_set.df, max_nodes, xb.size(0), neighbor_sample_func)
    nb = nb.to(device)
    break

In [None]:
logprob_w, logprob_nn, mu, logvar = model(xb)

In [None]:
kl_loss = EdgeReg.calculate_KL_loss(mu, logvar)
kl_loss

In [None]:
reconstr_loss = EdgeReg_v2.compute_reconstr_loss(logprob_w, xb)

In [None]:
reconstr_loss

In [None]:
nn_reconstr_loss = EdgeReg_v2.compute_edge_reconstr_loss(logprob_nn, nb)

In [None]:
nn_reconstr_loss

In [None]:
eps = mu.new_empty((mu.size(0), T, mu.size(1))).normal_()

In [None]:
std = torch.sqrt(torch.exp(logvar))
std = std.unsqueeze(1)
mu = mu.unsqueeze(1)

In [None]:
z = eps.mul(std).add_(mu)

In [None]:
z.size()

In [None]:
log_prob_w = model.decoder(z.view(-1, 32))
log_prob_w = log_prob_w.view(batch_size, T, -1)

In [None]:
prob_w.size()

In [None]:
-torch.mean(torch.sum(log_prob_w * xb.unsqueeze(1), dim=2))

In [None]:
eps = torch.cuda.FloatTensor(mu.size()).normal_()
eps.size()

In [None]:
def reparametrize(self, mu, logvar):
        std = torch.sqrt(torch.exp(logvar))
        eps = torch.cuda.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)