Skip to content

Commit

Permalink
documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
Junxiang Wang committed Mar 31, 2024
1 parent 7002a56 commit 1e2b5a8
Show file tree
Hide file tree
Showing 7 changed files with 333 additions and 38 deletions.
53 changes: 50 additions & 3 deletions GNN/GCNSI/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,27 @@ def train(self, adj, train_dataset, alpha_list=[0.001,0.01, 0.1], thres_list=[0.
- opt_pred (numpy.ndarray): Predicted seed vector of the training set given opt_alpha, every column is the prediction of every simulation. It is used to adjust thres_list.
Example:
from data.utils import load_dataset, diffusion_generation, split_dataset
from GNN.GCNSI.main import GCNSI
data_name = 'karate'
graph = load_dataset(data_name)
dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1)
adj, train_dataset, test_dataset =split_dataset(dataset)
gcnsi = GCNSI()
gcnsi_model, alpha, thres, auc, f1, pred =gcnsi.train(adj, train_dataset)
print("GCNSI:")
print(f"train auc: {auc:.3f}, train f1: {f1:.3f}")
"""
# Compute Laplacian matrix
S = csgraph.laplacian(adj, normed=True)
Expand Down Expand Up @@ -147,6 +168,32 @@ def test(self, adj, test_dataset, gcnsi_model, alpha, thres):
Returns:
- metric (Metric): Evaluation metric containing accuracy, precision, recall, F1 score, and AUC score.
Example:
from data.utils import load_dataset, diffusion_generation, split_dataset
from GNN.GCNSI.main import GCNSI
data_name = 'karate'
graph = load_dataset(data_name)
dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1)
adj, train_dataset, test_dataset =split_dataset(dataset)
gcnsi = GCNSI()
gcnsi_model, alpha, thres, auc, f1, pred =gcnsi.train(adj, train_dataset)
print("GCNSI:")
print(f"train auc: {auc:.3f}, train f1: {f1:.3f}")
metric = gcnsi.test(adj, test_dataset, gcnsi_model, alpha, thres)
print(f"test acc: {metric.acc:.3f}, test pr: {metric.pr:.3f}, test re: {metric.re:.3f}, test f1: {metric.f1:.3f}, test auc: {metric.auc:.3f}")
"""
# Compute Laplacian matrix
S = csgraph.laplacian(adj, normed=True)
Expand All @@ -171,9 +218,9 @@ def test(self, adj, test_dataset, gcnsi_model, alpha, thres):
pred = torch.softmax(pred, dim=1)
pred = pred[:, 1].squeeze(-1).detach().numpy()
test_acc += accuracy_score(seed_vec, pred >= thres)
test_pr += precision_score(seed_vec, pred >= thres)
test_re += recall_score(seed_vec, pred >= thres)
test_f1 += f1_score(seed_vec, pred >= thres)
test_pr += precision_score(seed_vec, pred >= thres, zero_division = 1)
test_re += recall_score(seed_vec, pred >= thres, zero_division = 1)
test_f1 += f1_score(seed_vec, pred >= thres, zero_division = 1)
test_auc += roc_auc_score(seed_vec, pred)

test_acc = test_acc / test_num
Expand Down
88 changes: 81 additions & 7 deletions GNN/IVGD/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,26 @@ def train_diffusion(self, adj, train_dataset):
Returns:
- torch.nn.Module: Trained diffusion model.
- diffusion_model (torch.nn.Module): Trained diffusion model.
Example:
from data.utils import load_dataset, diffusion_generation, split_dataset
from GNN.IVGD.main import IVGD
data_name = 'karate'
graph = load_dataset(data_name)
dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1)
adj, train_dataset, test_dataset =split_dataset(dataset)
ivgd = IVGD()
diffusion_model = ivgd.train_diffusion(adj, train_dataset)
"""
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_node = adj.shape[0]
Expand Down Expand Up @@ -184,7 +203,7 @@ def train_diffusion(self, adj, train_dataset):
print(f"run time per epoch:{result['runtime_perepoch']:.3f} seconds")
return diffusion_model

def train(self, adj, train_dataset, diffusion_model, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9],lr =0.001, num_epoch = 10):
def train(self, adj, train_dataset, diffusion_model, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9],lr =1e-3, weight_decay = 1e-4, num_epoch = 10):
"""
Train the IVGD model.
Expand All @@ -200,6 +219,8 @@ def train(self, adj, train_dataset, diffusion_model, thres_list=[0.1, 0.3, 0.5,
- lr (float): Learning rate.
- weight_decay (float): Weight decay.
- num_epoch (int): Number of epochs for training.
Returns:
Expand All @@ -213,16 +234,41 @@ def train(self, adj, train_dataset, diffusion_model, thres_list=[0.1, 0.3, 0.5,
- opt_f1 (float): Optimal F1 score.
- pred (numpy.ndarray): Predicted seed vector of the training set, every column is the prediction of every simulation. It is used to adjust thres_list.
Example:
from data.utils import load_dataset, diffusion_generation, split_dataset
from GNN.IVGD.main import IVGD
data_name = 'karate'
graph = load_dataset(data_name)
dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1)
adj, train_dataset, test_dataset =split_dataset(dataset)
ivgd = IVGD()
diffusion_model = ivgd.train_diffusion(adj, train_dataset)
ivgd_model, thres, auc, f1, pred =ivgd.train(adj, train_dataset, diffusion_model)
print("IVGD:")
print(f"train auc: {auc:.3f}, train f1: {f1:.3f}")
"""
criterion = nn.CrossEntropyLoss()
train_num = len(train_dataset)
#train_num = len(train_dataset)
num_node = adj.shape[0]
alpha = 1
tau = 1
rho = 1e-3
lamda = 0
ivgd = IVGD_model(alpha=alpha, tau=tau, rho=rho)
optimizer = optim.Adam(ivgd.parameters(), lr=lr)
optimizer = optim.Adam(ivgd.parameters(), lr = lr, weight_decay = weight_decay)
ivgd.train()
train_num = len(train_dataset)
for i,influ_mat in enumerate(train_dataset):
Expand Down Expand Up @@ -303,6 +349,34 @@ def test(self, test_dataset, diffusion_model, IVGD_model, thres):
Returns:
- metric (Metric): Object containing test metrics.
Example:
from data.utils import load_dataset, diffusion_generation, split_dataset
from GNN.IVGD.main import IVGD
data_name = 'karate'
graph = load_dataset(data_name)
dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1)
adj, train_dataset, test_dataset =split_dataset(dataset)
ivgd = IVGD()
diffusion_model = ivgd.train_diffusion(adj, train_dataset)
ivgd_model, thres, auc, f1, pred =ivgd.train(adj, train_dataset, diffusion_model)
print("IVGD:")
print(f"train auc: {auc:.3f}, train f1: {f1:.3f}")
metric = ivgd.test(test_dataset, diffusion_model, ivgd_model, thres)
print(f"test acc: {metric.acc:.3f}, test pr: {metric.pr:.3f}, test re: {metric.re:.3f}, test f1: {metric.f1:.3f}, test auc: {metric.auc:.3f}")
"""
test_num = len(test_dataset)
test_acc = 0
Expand Down Expand Up @@ -332,9 +406,9 @@ def test(self, test_dataset, diffusion_model, IVGD_model, thres):

# Compute metrics
test_acc += accuracy_score(seed_vec, seed_correction >= thres)
test_pr += precision_score(seed_vec, seed_correction >= thres)
test_re += recall_score(seed_vec, seed_correction >= thres)
test_f1 += f1_score(seed_vec, seed_correction >= thres)
test_pr += precision_score(seed_vec, seed_correction >= thres, zero_division = 1)
test_re += recall_score(seed_vec, seed_correction >= thres, zero_division = 1)
test_f1 += f1_score(seed_vec, seed_correction >= thres, zero_division = 1)
test_auc += roc_auc_score(seed_vec, seed_correction)

# Compute average metrics
Expand Down
56 changes: 43 additions & 13 deletions GNN/SLVAE/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from Evaluation import Metric
class SLVAE_model(nn.Module):
"""
Source Localization Variational Autoencoder (SLVAE) model combining VAE, GNN, and propagation modules.
Source Localization Variational Autoencoder (SLVAE) model combining VAE, GNN, and propagation modules.
Attributes:
- vae (nn.Module): Variational Autoencoder module.
Expand Down Expand Up @@ -73,6 +73,8 @@ def forward(self, seed_vec, train_mode):
predictions = self.gnn(seed_vec)
predictions = self.propagate(predictions)

predictions = torch.transpose(predictions, 0, 1)

# Return reconstructed seed vector, mean, log variance, and predictions
return seed_hat, mean, log_var, predictions

Expand Down Expand Up @@ -148,15 +150,15 @@ class SLVAE:
"""
Implement the Source Localization Variational Autoencoder (SLVAE) model.
Ling C, Jiang J, Wang J, et al. Source localization of graph diffusion via variational autoencoders for graph inverse problems[C]//Proceedings of the 28th ACM SIGKDD conference on knowledge discovery and data mining. 2022: 1010-1020.
Ling C, Jiang J, Wang J, et al. Source localization of graph diffusion via variational autoencoders for graph inverse problems[C]//Proceedings of the 28th ACM SIGKDD conference on knowledge discovery and data mining. 2022: 1010-1020.
"""

def __init__(self):
"""
Initialize the SLVAE model.
"""

def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.001, num_epoch = 50,print_epoch =10):
def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr = 1e-3, weight_decay = 1e-4, num_epoch = 50,print_epoch =10):
"""
Train the SLVAE model.
Expand All @@ -170,6 +172,8 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
- lr (float): Learning rate.
- weight_decay (float): Weight decay.
- num_epoch (int): Number of training epochs.
- print_epoch (int): Number of epochs every time to print loss.
Expand All @@ -187,6 +191,28 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
- opt_f1 (float): Optimal F1 score.
- pred (numpy.ndarray): Predicted seed vector of the training set, every column is the prediction of every simulation. It is used to adjust thres_list.
Example:
from data.utils import load_dataset, diffusion_generation, split_dataset
from GNN.SLVAE.main import SLVAE
data_name = 'karate'
graph = load_dataset(data_name)
dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1)
adj, train_dataset, test_dataset =split_dataset(dataset)
slave = SLVAE()
slvae_model, seed_vae_train, thres, auc, f1, pred = slave.train(adj, train_dataset)
print("SLVAE:")
print(f"train auc: {auc:.3f}, train f1: {f1:.3f}")
"""
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_node = adj.shape[0]
Expand All @@ -198,7 +224,7 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
v = torch.FloatTensor(values)
shape = adj.shape

adj_matrix = torch.sparse.FloatTensor(i, v, torch.Size(shape)).to_dense()
adj_matrix = torch.sparse_coo_tensor(i, v, torch.Size(shape)).to_dense()
train_num = len(train_dataset)
vae = VAE().to(device)
gnn = GNN(adj_matrix=adj_matrix)
Expand Down Expand Up @@ -227,7 +253,7 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
loss.backward()
optimizer.step()
average_loss = overall_loss/train_num
if epoch % print_epoch ==0:
if epoch % print_epoch == 0:
print(f"epoch = {epoch}, loss = {average_loss:.3f}")

# Evaluation
Expand All @@ -250,7 +276,7 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
for seed in seed_infer:
seed.requires_grad = True

optimizer = Adam(seed_infer, lr=lr)
optimizer = Adam(seed_infer, lr = lr, weight_decay = weight_decay)

for epoch in range(num_epoch):
overall_loss = 0
Expand All @@ -268,8 +294,9 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.

average_loss = overall_loss/train_num


if epoch % print_epoch ==0:
print(f"epoch = {epoch}")
print(f"epoch = {epoch}, obj = {average_loss:.4f}")

train_auc = 0
for i, influ_mat in enumerate(train_dataset):
Expand All @@ -287,7 +314,7 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
seed_vec = influ_mat[:, 0]
seed_vec = seed_vec.squeeze(-1).detach().numpy()
seed_pred = seed_infer[i].detach().numpy()
train_f1 += f1_score(seed_vec, seed_pred >= thres)
train_f1 += f1_score(seed_vec, seed_pred >= thres, zero_division = 1)
train_f1 = train_f1 / train_num
print(f"thres = {thres:.3f}, train_f1 = {train_f1:.3f}")
if train_f1 > opt_f1:
Expand Down Expand Up @@ -357,11 +384,14 @@ def infer(self, test_dataset, slvae_model, seed_vae_train, thres, lr=0.001,num_e

overall_loss += loss.item()

average_loss = overall_loss / test_num

loss.backward()
optimizer.step()


if epoch % print_epoch ==0:
print(f"epoch = {epoch}")
if epoch % print_epoch == 0:
print(f"epoch = {epoch}, obj = {average_loss:.4f}")

test_acc = 0
test_pr = 0
Expand All @@ -374,9 +404,9 @@ def infer(self, test_dataset, slvae_model, seed_vae_train, thres, lr=0.001,num_e
seed_vec = seed_vec.squeeze(-1).detach().numpy()
seed_pred = seed_infer[i].detach().numpy()
test_acc += accuracy_score(seed_vec, seed_pred >= thres)
test_pr += precision_score(seed_vec, seed_pred >= thres)
test_re += recall_score(seed_vec, seed_pred >= thres)
test_f1 += f1_score(seed_vec, seed_pred >= thres)
test_pr += precision_score(seed_vec, seed_pred >= thres, zero_division = 1)
test_re += recall_score(seed_vec, seed_pred >= thres, zero_division = 1)
test_f1 += f1_score(seed_vec, seed_pred >= thres, zero_division = 1)
test_auc += roc_auc_score(seed_vec, seed_pred)

test_acc = test_acc / test_num
Expand Down
2 changes: 1 addition & 1 deletion GNN/SLVAE/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ def forward(self, seed_vec):
mat = self.adj_matrix.T @ attr_mat[-1]
attr_mat = torch.cat((attr_mat, mat.unsqueeze(0)), 0)

layer_inner = self.act_fn(self.fcs[0](self.dropout(attr_mat.T)))
layer_inner = self.act_fn(self.fcs[0](self.dropout(attr_mat.permute(*torch.arange(attr_mat.ndim - 1, -1, -1)))))
for fc in self.fcs[1:-1]:
layer_inner = self.act_fn(fc(layer_inner))
res = torch.sigmoid(self.fcs[-1](self.dropout(layer_inner)))
Expand Down
Loading

0 comments on commit 1e2b5a8

Please sign in to comment.