documentation

xianggebenben · Mar 31, 2024 · 1e2b5a8 · 1e2b5a8
1 parent 7002a56
commit 1e2b5a8
Show file tree

Hide file tree

Showing 7 changed files with 333 additions and 38 deletions.
diff --git a/GNN/GCNSI/main.py b/GNN/GCNSI/main.py
@@ -53,6 +53,27 @@ def train(self, adj, train_dataset, alpha_list=[0.001,0.01, 0.1], thres_list=[0.
 
         - opt_pred (numpy.ndarray): Predicted seed vector of the training set given opt_alpha, every column is the prediction of every simulation. It is used to adjust thres_list.
 
+        Example:
+
+        from data.utils import load_dataset, diffusion_generation, split_dataset
+
+        from GNN.GCNSI.main import GCNSI
+
+        data_name = 'karate'
+
+        graph = load_dataset(data_name)
+
+        dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1)
+
+        adj, train_dataset, test_dataset =split_dataset(dataset)
+
+        gcnsi = GCNSI()
+
+        gcnsi_model, alpha, thres, auc, f1, pred =gcnsi.train(adj, train_dataset)
+
+        print("GCNSI:")
+
+        print(f"train auc: {auc:.3f}, train f1: {f1:.3f}")
         """
         # Compute Laplacian matrix
         S = csgraph.laplacian(adj, normed=True)
@@ -147,6 +168,32 @@ def test(self, adj, test_dataset, gcnsi_model, alpha, thres):
         Returns:
 
         - metric (Metric): Evaluation metric containing accuracy, precision, recall, F1 score, and AUC score.
+
+        Example:
+
+        from data.utils import load_dataset, diffusion_generation, split_dataset
+
+        from GNN.GCNSI.main import GCNSI
+
+        data_name = 'karate'
+
+        graph = load_dataset(data_name)
+
+        dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1)
+
+        adj, train_dataset, test_dataset =split_dataset(dataset)
+
+        gcnsi = GCNSI()
+
+        gcnsi_model, alpha, thres, auc, f1, pred =gcnsi.train(adj, train_dataset)
+
+        print("GCNSI:")
+
+        print(f"train auc: {auc:.3f}, train f1: {f1:.3f}")
+
+        metric = gcnsi.test(adj, test_dataset, gcnsi_model, alpha, thres)
+
+        print(f"test acc: {metric.acc:.3f}, test pr: {metric.pr:.3f}, test re: {metric.re:.3f}, test f1: {metric.f1:.3f}, test auc: {metric.auc:.3f}")
         """
         # Compute Laplacian matrix
         S = csgraph.laplacian(adj, normed=True)
@@ -171,9 +218,9 @@ def test(self, adj, test_dataset, gcnsi_model, alpha, thres):
             pred = torch.softmax(pred, dim=1)
             pred = pred[:, 1].squeeze(-1).detach().numpy()
             test_acc += accuracy_score(seed_vec, pred >= thres)
-            test_pr += precision_score(seed_vec, pred >= thres)
-            test_re += recall_score(seed_vec, pred >= thres)
-            test_f1 += f1_score(seed_vec, pred >= thres)
+            test_pr += precision_score(seed_vec, pred >= thres, zero_division = 1)
+            test_re += recall_score(seed_vec, pred >= thres, zero_division = 1)
+            test_f1 += f1_score(seed_vec, pred >= thres, zero_division = 1)
             test_auc += roc_auc_score(seed_vec, pred)
 
         test_acc = test_acc / test_num

diff --git a/GNN/IVGD/main.py b/GNN/IVGD/main.py
@@ -144,7 +144,26 @@ def train_diffusion(self, adj, train_dataset):
 
         Returns:
 
-        - torch.nn.Module: Trained diffusion model.
+        - diffusion_model (torch.nn.Module): Trained diffusion model.
+
+        Example:
+
+        from data.utils import load_dataset, diffusion_generation, split_dataset
+
+        from GNN.IVGD.main import IVGD
+
+        data_name = 'karate'
+
+        graph = load_dataset(data_name)
+
+        dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1)
+
+        adj, train_dataset, test_dataset =split_dataset(dataset)
+
+        ivgd = IVGD()
+
+        diffusion_model = ivgd.train_diffusion(adj, train_dataset)
+
         """
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         num_node = adj.shape[0]
@@ -184,7 +203,7 @@ def train_diffusion(self, adj, train_dataset):
         print(f"run time per epoch:{result['runtime_perepoch']:.3f} seconds")
         return diffusion_model
 
-    def train(self, adj, train_dataset, diffusion_model, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9],lr =0.001, num_epoch = 10):
+    def train(self, adj, train_dataset, diffusion_model, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9],lr =1e-3, weight_decay = 1e-4, num_epoch = 10):
         """
         Train the IVGD model.
 
@@ -200,6 +219,8 @@ def train(self, adj, train_dataset, diffusion_model, thres_list=[0.1, 0.3, 0.5,
 
         - lr (float): Learning rate.
 
+        - weight_decay (float): Weight decay.
+
         - num_epoch (int): Number of epochs for training.
 
         Returns:
@@ -213,16 +234,41 @@ def train(self, adj, train_dataset, diffusion_model, thres_list=[0.1, 0.3, 0.5,
         - opt_f1 (float): Optimal F1 score.
 
         - pred (numpy.ndarray): Predicted seed vector of the training set, every column is the prediction of every simulation. It is used to adjust thres_list.
+
+        Example:
+
+        from data.utils import load_dataset, diffusion_generation, split_dataset
+
+        from GNN.IVGD.main import IVGD
+
+        data_name = 'karate'
+
+        graph = load_dataset(data_name)
+
+        dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1)
+
+        adj, train_dataset, test_dataset =split_dataset(dataset)
+
+        ivgd = IVGD()
+
+        diffusion_model = ivgd.train_diffusion(adj, train_dataset)
+
+        ivgd_model, thres, auc, f1, pred =ivgd.train(adj, train_dataset, diffusion_model)
+
+        print("IVGD:")
+
+        print(f"train auc: {auc:.3f}, train f1: {f1:.3f}")
+
         """
         criterion = nn.CrossEntropyLoss()
-        train_num = len(train_dataset)
+        #train_num = len(train_dataset)
         num_node = adj.shape[0]
         alpha = 1
         tau = 1
         rho = 1e-3
         lamda = 0
         ivgd = IVGD_model(alpha=alpha, tau=tau, rho=rho)
-        optimizer = optim.Adam(ivgd.parameters(), lr=lr)
+        optimizer = optim.Adam(ivgd.parameters(), lr = lr, weight_decay = weight_decay)
         ivgd.train()
         train_num = len(train_dataset)
         for i,influ_mat in enumerate(train_dataset):
@@ -303,6 +349,34 @@ def test(self, test_dataset, diffusion_model, IVGD_model, thres):
         Returns:
 
         - metric (Metric): Object containing test metrics.
+
+        Example:
+
+        from data.utils import load_dataset, diffusion_generation, split_dataset
+
+        from GNN.IVGD.main import IVGD
+
+        data_name = 'karate'
+
+        graph = load_dataset(data_name)
+
+        dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1)
+
+        adj, train_dataset, test_dataset =split_dataset(dataset)
+
+        ivgd = IVGD()
+
+        diffusion_model = ivgd.train_diffusion(adj, train_dataset)
+
+        ivgd_model, thres, auc, f1, pred =ivgd.train(adj, train_dataset, diffusion_model)
+
+        print("IVGD:")
+
+        print(f"train auc: {auc:.3f}, train f1: {f1:.3f}")
+
+        metric = ivgd.test(test_dataset, diffusion_model, ivgd_model, thres)
+
+        print(f"test acc: {metric.acc:.3f}, test pr: {metric.pr:.3f}, test re: {metric.re:.3f}, test f1: {metric.f1:.3f}, test auc: {metric.auc:.3f}")
         """
         test_num = len(test_dataset)
         test_acc = 0
@@ -332,9 +406,9 @@ def test(self, test_dataset, diffusion_model, IVGD_model, thres):
 
             # Compute metrics
             test_acc += accuracy_score(seed_vec, seed_correction >= thres)
-            test_pr += precision_score(seed_vec, seed_correction >= thres)
-            test_re += recall_score(seed_vec, seed_correction >= thres)
-            test_f1 += f1_score(seed_vec, seed_correction >= thres)
+            test_pr += precision_score(seed_vec, seed_correction >= thres, zero_division = 1)
+            test_re += recall_score(seed_vec, seed_correction >= thres, zero_division = 1)
+            test_f1 += f1_score(seed_vec, seed_correction >= thres, zero_division = 1)
             test_auc += roc_auc_score(seed_vec, seed_correction)
 
         # Compute average metrics

diff --git a/GNN/SLVAE/main.py b/GNN/SLVAE/main.py
@@ -8,7 +8,7 @@
 from Evaluation import Metric
 class SLVAE_model(nn.Module):
     """
-        Source Localization Variational Autoencoder (SLVAE) model combining VAE, GNN, and propagation modules.
+    Source Localization Variational Autoencoder (SLVAE) model combining VAE, GNN, and propagation modules.
 
     Attributes:
     - vae (nn.Module): Variational Autoencoder module.
@@ -73,6 +73,8 @@ def forward(self, seed_vec, train_mode):
             predictions = self.gnn(seed_vec)
             predictions = self.propagate(predictions)
 
+        predictions = torch.transpose(predictions, 0, 1)
+
         # Return reconstructed seed vector, mean, log variance, and predictions
         return seed_hat, mean, log_var, predictions
 
@@ -148,15 +150,15 @@ class SLVAE:
     """
     Implement the Source Localization Variational Autoencoder (SLVAE) model.
 
-Ling C, Jiang J, Wang J, et al. Source localization of graph diffusion via variational autoencoders for graph inverse problems[C]//Proceedings of the 28th ACM SIGKDD conference on knowledge discovery and data mining. 2022: 1010-1020.
+    Ling C, Jiang J, Wang J, et al. Source localization of graph diffusion via variational autoencoders for graph inverse problems[C]//Proceedings of the 28th ACM SIGKDD conference on knowledge discovery and data mining. 2022: 1010-1020.
     """
 
     def __init__(self):
         """
         Initialize the SLVAE model.
         """
 
-    def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.001, num_epoch = 50,print_epoch =10):
+    def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr = 1e-3, weight_decay = 1e-4, num_epoch = 50,print_epoch =10):
         """
         Train the SLVAE model.
 
@@ -170,6 +172,8 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
 
         - lr (float): Learning rate.
 
+        - weight_decay (float): Weight decay.
+
         - num_epoch (int): Number of training epochs.
 
         - print_epoch (int): Number of epochs every time to print loss.
@@ -187,6 +191,28 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
         - opt_f1 (float): Optimal F1 score.
 
         - pred (numpy.ndarray): Predicted seed vector of the training set, every column is the prediction of every simulation. It is used to adjust thres_list.
+
+        Example:
+
+        from data.utils import load_dataset, diffusion_generation, split_dataset
+
+        from GNN.SLVAE.main import SLVAE
+
+        data_name = 'karate'
+
+        graph = load_dataset(data_name)
+
+        dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1)
+
+        adj, train_dataset, test_dataset =split_dataset(dataset)
+
+        slave = SLVAE()
+
+        slvae_model, seed_vae_train, thres, auc, f1, pred = slave.train(adj, train_dataset)
+
+        print("SLVAE:")
+
+        print(f"train auc: {auc:.3f}, train f1: {f1:.3f}")
         """
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         num_node = adj.shape[0]
@@ -198,7 +224,7 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
         v = torch.FloatTensor(values)
         shape = adj.shape
 
-        adj_matrix = torch.sparse.FloatTensor(i, v, torch.Size(shape)).to_dense()
+        adj_matrix = torch.sparse_coo_tensor(i, v, torch.Size(shape)).to_dense()
         train_num = len(train_dataset)
         vae = VAE().to(device)
         gnn = GNN(adj_matrix=adj_matrix)
@@ -227,7 +253,7 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
                 loss.backward()
                 optimizer.step()
             average_loss = overall_loss/train_num
-            if epoch % print_epoch ==0:
+            if epoch % print_epoch == 0:
                 print(f"epoch = {epoch}, loss = {average_loss:.3f}")
 
         # Evaluation
@@ -250,7 +276,7 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
         for seed in seed_infer:
             seed.requires_grad = True
 
-        optimizer = Adam(seed_infer, lr=lr)
+        optimizer = Adam(seed_infer, lr = lr, weight_decay = weight_decay)
 
         for epoch in range(num_epoch):
             overall_loss = 0
@@ -268,8 +294,9 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
 
             average_loss = overall_loss/train_num
 
+
             if epoch % print_epoch ==0:
-                print(f"epoch = {epoch}")
+                print(f"epoch = {epoch}, obj = {average_loss:.4f}")
 
         train_auc = 0
         for i, influ_mat in enumerate(train_dataset):
@@ -287,7 +314,7 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.
                 seed_vec = influ_mat[:, 0]
                 seed_vec = seed_vec.squeeze(-1).detach().numpy()
                 seed_pred = seed_infer[i].detach().numpy()
-                train_f1 += f1_score(seed_vec, seed_pred >= thres)
+                train_f1 += f1_score(seed_vec, seed_pred >= thres, zero_division = 1)
             train_f1 = train_f1 / train_num
             print(f"thres = {thres:.3f}, train_f1 = {train_f1:.3f}")
             if train_f1 > opt_f1:
@@ -357,11 +384,14 @@ def infer(self, test_dataset, slvae_model, seed_vae_train, thres, lr=0.001,num_e
 
                 overall_loss += loss.item()
 
+                average_loss = overall_loss / test_num
+
                 loss.backward()
                 optimizer.step()
+
 
-            if epoch % print_epoch ==0:
-                print(f"epoch = {epoch}")
+            if epoch % print_epoch == 0:
+                print(f"epoch = {epoch}, obj = {average_loss:.4f}")
 
         test_acc = 0
         test_pr = 0
@@ -374,9 +404,9 @@ def infer(self, test_dataset, slvae_model, seed_vae_train, thres, lr=0.001,num_e
             seed_vec = seed_vec.squeeze(-1).detach().numpy()
             seed_pred = seed_infer[i].detach().numpy()
             test_acc += accuracy_score(seed_vec, seed_pred >= thres)
-            test_pr += precision_score(seed_vec, seed_pred >= thres)
-            test_re += recall_score(seed_vec, seed_pred >= thres)
-            test_f1 += f1_score(seed_vec, seed_pred >= thres)
+            test_pr += precision_score(seed_vec, seed_pred >= thres, zero_division = 1)
+            test_re += recall_score(seed_vec, seed_pred >= thres, zero_division = 1)
+            test_f1 += f1_score(seed_vec, seed_pred >= thres, zero_division = 1)
             test_auc += roc_auc_score(seed_vec, seed_pred)
 
         test_acc = test_acc / test_num

diff --git a/GNN/SLVAE/model.py b/GNN/SLVAE/model.py
@@ -320,7 +320,7 @@ def forward(self, seed_vec):
                 mat = self.adj_matrix.T @ attr_mat[-1]
                 attr_mat = torch.cat((attr_mat, mat.unsqueeze(0)), 0)
 
-        layer_inner = self.act_fn(self.fcs[0](self.dropout(attr_mat.T)))
+        layer_inner = self.act_fn(self.fcs[0](self.dropout(attr_mat.permute(*torch.arange(attr_mat.ndim - 1, -1, -1)))))
         for fc in self.fcs[1:-1]:
             layer_inner = self.act_fn(fc(layer_inner))
         res = torch.sigmoid(self.fcs[-1](self.dropout(layer_inner)))