From 1e2b5a8461c103d1ceb8bc24993cbddeeaf65ef9 Mon Sep 17 00:00:00 2001 From: Junxiang Wang Date: Sun, 31 Mar 2024 15:58:17 -0400 Subject: [PATCH] documentation --- GNN/GCNSI/main.py | 53 +++++++++++++- GNN/IVGD/main.py | 88 ++++++++++++++++++++++-- GNN/SLVAE/main.py | 56 +++++++++++---- GNN/SLVAE/model.py | 2 +- Prescribed.py | 168 +++++++++++++++++++++++++++++++++++++++++---- all.py | 2 +- data/utils.py | 2 +- 7 files changed, 333 insertions(+), 38 deletions(-) diff --git a/GNN/GCNSI/main.py b/GNN/GCNSI/main.py index 42b925b..b92b094 100644 --- a/GNN/GCNSI/main.py +++ b/GNN/GCNSI/main.py @@ -53,6 +53,27 @@ def train(self, adj, train_dataset, alpha_list=[0.001,0.01, 0.1], thres_list=[0. - opt_pred (numpy.ndarray): Predicted seed vector of the training set given opt_alpha, every column is the prediction of every simulation. It is used to adjust thres_list. + Example: + + from data.utils import load_dataset, diffusion_generation, split_dataset + + from GNN.GCNSI.main import GCNSI + + data_name = 'karate' + + graph = load_dataset(data_name) + + dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) + + adj, train_dataset, test_dataset =split_dataset(dataset) + + gcnsi = GCNSI() + + gcnsi_model, alpha, thres, auc, f1, pred =gcnsi.train(adj, train_dataset) + + print("GCNSI:") + + print(f"train auc: {auc:.3f}, train f1: {f1:.3f}") """ # Compute Laplacian matrix S = csgraph.laplacian(adj, normed=True) @@ -147,6 +168,32 @@ def test(self, adj, test_dataset, gcnsi_model, alpha, thres): Returns: - metric (Metric): Evaluation metric containing accuracy, precision, recall, F1 score, and AUC score. + + Example: + + from data.utils import load_dataset, diffusion_generation, split_dataset + + from GNN.GCNSI.main import GCNSI + + data_name = 'karate' + + graph = load_dataset(data_name) + + dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) + + adj, train_dataset, test_dataset =split_dataset(dataset) + + gcnsi = GCNSI() + + gcnsi_model, alpha, thres, auc, f1, pred =gcnsi.train(adj, train_dataset) + + print("GCNSI:") + + print(f"train auc: {auc:.3f}, train f1: {f1:.3f}") + + metric = gcnsi.test(adj, test_dataset, gcnsi_model, alpha, thres) + + print(f"test acc: {metric.acc:.3f}, test pr: {metric.pr:.3f}, test re: {metric.re:.3f}, test f1: {metric.f1:.3f}, test auc: {metric.auc:.3f}") """ # Compute Laplacian matrix S = csgraph.laplacian(adj, normed=True) @@ -171,9 +218,9 @@ def test(self, adj, test_dataset, gcnsi_model, alpha, thres): pred = torch.softmax(pred, dim=1) pred = pred[:, 1].squeeze(-1).detach().numpy() test_acc += accuracy_score(seed_vec, pred >= thres) - test_pr += precision_score(seed_vec, pred >= thres) - test_re += recall_score(seed_vec, pred >= thres) - test_f1 += f1_score(seed_vec, pred >= thres) + test_pr += precision_score(seed_vec, pred >= thres, zero_division = 1) + test_re += recall_score(seed_vec, pred >= thres, zero_division = 1) + test_f1 += f1_score(seed_vec, pred >= thres, zero_division = 1) test_auc += roc_auc_score(seed_vec, pred) test_acc = test_acc / test_num diff --git a/GNN/IVGD/main.py b/GNN/IVGD/main.py index 37bfd20..af0f7e8 100644 --- a/GNN/IVGD/main.py +++ b/GNN/IVGD/main.py @@ -144,7 +144,26 @@ def train_diffusion(self, adj, train_dataset): Returns: - - torch.nn.Module: Trained diffusion model. + - diffusion_model (torch.nn.Module): Trained diffusion model. + + Example: + + from data.utils import load_dataset, diffusion_generation, split_dataset + + from GNN.IVGD.main import IVGD + + data_name = 'karate' + + graph = load_dataset(data_name) + + dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) + + adj, train_dataset, test_dataset =split_dataset(dataset) + + ivgd = IVGD() + + diffusion_model = ivgd.train_diffusion(adj, train_dataset) + """ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') num_node = adj.shape[0] @@ -184,7 +203,7 @@ def train_diffusion(self, adj, train_dataset): print(f"run time per epoch:{result['runtime_perepoch']:.3f} seconds") return diffusion_model - def train(self, adj, train_dataset, diffusion_model, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9],lr =0.001, num_epoch = 10): + def train(self, adj, train_dataset, diffusion_model, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9],lr =1e-3, weight_decay = 1e-4, num_epoch = 10): """ Train the IVGD model. @@ -200,6 +219,8 @@ def train(self, adj, train_dataset, diffusion_model, thres_list=[0.1, 0.3, 0.5, - lr (float): Learning rate. + - weight_decay (float): Weight decay. + - num_epoch (int): Number of epochs for training. Returns: @@ -213,16 +234,41 @@ def train(self, adj, train_dataset, diffusion_model, thres_list=[0.1, 0.3, 0.5, - opt_f1 (float): Optimal F1 score. - pred (numpy.ndarray): Predicted seed vector of the training set, every column is the prediction of every simulation. It is used to adjust thres_list. + + Example: + + from data.utils import load_dataset, diffusion_generation, split_dataset + + from GNN.IVGD.main import IVGD + + data_name = 'karate' + + graph = load_dataset(data_name) + + dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) + + adj, train_dataset, test_dataset =split_dataset(dataset) + + ivgd = IVGD() + + diffusion_model = ivgd.train_diffusion(adj, train_dataset) + + ivgd_model, thres, auc, f1, pred =ivgd.train(adj, train_dataset, diffusion_model) + + print("IVGD:") + + print(f"train auc: {auc:.3f}, train f1: {f1:.3f}") + """ criterion = nn.CrossEntropyLoss() - train_num = len(train_dataset) + #train_num = len(train_dataset) num_node = adj.shape[0] alpha = 1 tau = 1 rho = 1e-3 lamda = 0 ivgd = IVGD_model(alpha=alpha, tau=tau, rho=rho) - optimizer = optim.Adam(ivgd.parameters(), lr=lr) + optimizer = optim.Adam(ivgd.parameters(), lr = lr, weight_decay = weight_decay) ivgd.train() train_num = len(train_dataset) for i,influ_mat in enumerate(train_dataset): @@ -303,6 +349,34 @@ def test(self, test_dataset, diffusion_model, IVGD_model, thres): Returns: - metric (Metric): Object containing test metrics. + + Example: + + from data.utils import load_dataset, diffusion_generation, split_dataset + + from GNN.IVGD.main import IVGD + + data_name = 'karate' + + graph = load_dataset(data_name) + + dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) + + adj, train_dataset, test_dataset =split_dataset(dataset) + + ivgd = IVGD() + + diffusion_model = ivgd.train_diffusion(adj, train_dataset) + + ivgd_model, thres, auc, f1, pred =ivgd.train(adj, train_dataset, diffusion_model) + + print("IVGD:") + + print(f"train auc: {auc:.3f}, train f1: {f1:.3f}") + + metric = ivgd.test(test_dataset, diffusion_model, ivgd_model, thres) + + print(f"test acc: {metric.acc:.3f}, test pr: {metric.pr:.3f}, test re: {metric.re:.3f}, test f1: {metric.f1:.3f}, test auc: {metric.auc:.3f}") """ test_num = len(test_dataset) test_acc = 0 @@ -332,9 +406,9 @@ def test(self, test_dataset, diffusion_model, IVGD_model, thres): # Compute metrics test_acc += accuracy_score(seed_vec, seed_correction >= thres) - test_pr += precision_score(seed_vec, seed_correction >= thres) - test_re += recall_score(seed_vec, seed_correction >= thres) - test_f1 += f1_score(seed_vec, seed_correction >= thres) + test_pr += precision_score(seed_vec, seed_correction >= thres, zero_division = 1) + test_re += recall_score(seed_vec, seed_correction >= thres, zero_division = 1) + test_f1 += f1_score(seed_vec, seed_correction >= thres, zero_division = 1) test_auc += roc_auc_score(seed_vec, seed_correction) # Compute average metrics diff --git a/GNN/SLVAE/main.py b/GNN/SLVAE/main.py index 7e093db..c89860e 100644 --- a/GNN/SLVAE/main.py +++ b/GNN/SLVAE/main.py @@ -8,7 +8,7 @@ from Evaluation import Metric class SLVAE_model(nn.Module): """ - Source Localization Variational Autoencoder (SLVAE) model combining VAE, GNN, and propagation modules. + Source Localization Variational Autoencoder (SLVAE) model combining VAE, GNN, and propagation modules. Attributes: - vae (nn.Module): Variational Autoencoder module. @@ -73,6 +73,8 @@ def forward(self, seed_vec, train_mode): predictions = self.gnn(seed_vec) predictions = self.propagate(predictions) + predictions = torch.transpose(predictions, 0, 1) + # Return reconstructed seed vector, mean, log variance, and predictions return seed_hat, mean, log_var, predictions @@ -148,7 +150,7 @@ class SLVAE: """ Implement the Source Localization Variational Autoencoder (SLVAE) model. -Ling C, Jiang J, Wang J, et al. Source localization of graph diffusion via variational autoencoders for graph inverse problems[C]//Proceedings of the 28th ACM SIGKDD conference on knowledge discovery and data mining. 2022: 1010-1020. + Ling C, Jiang J, Wang J, et al. Source localization of graph diffusion via variational autoencoders for graph inverse problems[C]//Proceedings of the 28th ACM SIGKDD conference on knowledge discovery and data mining. 2022: 1010-1020. """ def __init__(self): @@ -156,7 +158,7 @@ def __init__(self): Initialize the SLVAE model. """ - def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0.001, num_epoch = 50,print_epoch =10): + def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr = 1e-3, weight_decay = 1e-4, num_epoch = 50,print_epoch =10): """ Train the SLVAE model. @@ -170,6 +172,8 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0. - lr (float): Learning rate. + - weight_decay (float): Weight decay. + - num_epoch (int): Number of training epochs. - print_epoch (int): Number of epochs every time to print loss. @@ -187,6 +191,28 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0. - opt_f1 (float): Optimal F1 score. - pred (numpy.ndarray): Predicted seed vector of the training set, every column is the prediction of every simulation. It is used to adjust thres_list. + + Example: + + from data.utils import load_dataset, diffusion_generation, split_dataset + + from GNN.SLVAE.main import SLVAE + + data_name = 'karate' + + graph = load_dataset(data_name) + + dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) + + adj, train_dataset, test_dataset =split_dataset(dataset) + + slave = SLVAE() + + slvae_model, seed_vae_train, thres, auc, f1, pred = slave.train(adj, train_dataset) + + print("SLVAE:") + + print(f"train auc: {auc:.3f}, train f1: {f1:.3f}") """ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') num_node = adj.shape[0] @@ -198,7 +224,7 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0. v = torch.FloatTensor(values) shape = adj.shape - adj_matrix = torch.sparse.FloatTensor(i, v, torch.Size(shape)).to_dense() + adj_matrix = torch.sparse_coo_tensor(i, v, torch.Size(shape)).to_dense() train_num = len(train_dataset) vae = VAE().to(device) gnn = GNN(adj_matrix=adj_matrix) @@ -227,7 +253,7 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0. loss.backward() optimizer.step() average_loss = overall_loss/train_num - if epoch % print_epoch ==0: + if epoch % print_epoch == 0: print(f"epoch = {epoch}, loss = {average_loss:.3f}") # Evaluation @@ -250,7 +276,7 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0. for seed in seed_infer: seed.requires_grad = True - optimizer = Adam(seed_infer, lr=lr) + optimizer = Adam(seed_infer, lr = lr, weight_decay = weight_decay) for epoch in range(num_epoch): overall_loss = 0 @@ -268,8 +294,9 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0. average_loss = overall_loss/train_num + if epoch % print_epoch ==0: - print(f"epoch = {epoch}") + print(f"epoch = {epoch}, obj = {average_loss:.4f}") train_auc = 0 for i, influ_mat in enumerate(train_dataset): @@ -287,7 +314,7 @@ def train(self, adj, train_dataset, thres_list=[0.1, 0.3, 0.5, 0.7, 0.9], lr =0. seed_vec = influ_mat[:, 0] seed_vec = seed_vec.squeeze(-1).detach().numpy() seed_pred = seed_infer[i].detach().numpy() - train_f1 += f1_score(seed_vec, seed_pred >= thres) + train_f1 += f1_score(seed_vec, seed_pred >= thres, zero_division = 1) train_f1 = train_f1 / train_num print(f"thres = {thres:.3f}, train_f1 = {train_f1:.3f}") if train_f1 > opt_f1: @@ -357,11 +384,14 @@ def infer(self, test_dataset, slvae_model, seed_vae_train, thres, lr=0.001,num_e overall_loss += loss.item() + average_loss = overall_loss / test_num + loss.backward() optimizer.step() + - if epoch % print_epoch ==0: - print(f"epoch = {epoch}") + if epoch % print_epoch == 0: + print(f"epoch = {epoch}, obj = {average_loss:.4f}") test_acc = 0 test_pr = 0 @@ -374,9 +404,9 @@ def infer(self, test_dataset, slvae_model, seed_vae_train, thres, lr=0.001,num_e seed_vec = seed_vec.squeeze(-1).detach().numpy() seed_pred = seed_infer[i].detach().numpy() test_acc += accuracy_score(seed_vec, seed_pred >= thres) - test_pr += precision_score(seed_vec, seed_pred >= thres) - test_re += recall_score(seed_vec, seed_pred >= thres) - test_f1 += f1_score(seed_vec, seed_pred >= thres) + test_pr += precision_score(seed_vec, seed_pred >= thres, zero_division = 1) + test_re += recall_score(seed_vec, seed_pred >= thres, zero_division = 1) + test_f1 += f1_score(seed_vec, seed_pred >= thres, zero_division = 1) test_auc += roc_auc_score(seed_vec, seed_pred) test_acc = test_acc / test_num diff --git a/GNN/SLVAE/model.py b/GNN/SLVAE/model.py index c37b8b3..1f1159f 100644 --- a/GNN/SLVAE/model.py +++ b/GNN/SLVAE/model.py @@ -320,7 +320,7 @@ def forward(self, seed_vec): mat = self.adj_matrix.T @ attr_mat[-1] attr_mat = torch.cat((attr_mat, mat.unsqueeze(0)), 0) - layer_inner = self.act_fn(self.fcs[0](self.dropout(attr_mat.T))) + layer_inner = self.act_fn(self.fcs[0](self.dropout(attr_mat.permute(*torch.arange(attr_mat.ndim - 1, -1, -1))))) for fc in self.fcs[1:-1]: layer_inner = self.act_fn(fc(layer_inner)) res = torch.sigmoid(self.fcs[-1](self.dropout(layer_inner))) diff --git a/Prescribed.py b/Prescribed.py index e9ad7b1..244cfa3 100644 --- a/Prescribed.py +++ b/Prescribed.py @@ -64,6 +64,28 @@ def train(self, adj, train_dataset, alpha_list=[0.001, 0.01, 0.1], thres_list=[0 - opt_f1 (float): Optimal F1 score value. - opt_pred (numpy.ndarray): Prediction of training seed vector given opt_alpha, every column is the prediction of every simulation. It is used to adjust thres_list. + + Example: + + from data.utils import load_dataset, diffusion_generation, split_dataset + + from Prescribed import LPSI + + data_name = 'karate' + + graph = load_dataset(data_name) + + dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) + + adj, train_dataset, test_dataset =split_dataset(dataset) + + lpsi = LPSI() + + alpha, thres, auc, f1, pred =lpsi.train(adj, train_dataset) + + print("LPSI:") + + print(f"train auc: {auc:.3f}, train f1: {f1:.3f}") """ laplacian = csgraph.laplacian(adj, normed=True) laplacian = np.array(coo_matrix.todense(laplacian)) @@ -97,7 +119,7 @@ def train(self, adj, train_dataset, alpha_list=[0.001, 0.01, 0.1], thres_list=[0 print(f"thres = {thres:.3f}") train_f1 = 0 for i in range(train_num): - train_f1 += f1_score(seed_all[:,i], opt_pred[:,i] >= thres) + train_f1 += f1_score(seed_all[:,i], opt_pred[:,i] >= thres, zero_division = 1) train_f1 = train_f1 / train_num print(f"thres = {thres:.3f}, train_f1 = {train_f1:.3f}") if train_f1 > opt_f1: @@ -122,6 +144,32 @@ def test(self, adj, test_dataset, alpha, thres): Returns: - metric (Metric): Evaluation metric containing accuracy, precision, recall, F1 score, and AUC. + + Example: + + from data.utils import load_dataset, diffusion_generation, split_dataset + + from Prescribed import LPSI + + data_name = 'karate' + + graph = load_dataset(data_name) + + dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) + + adj, train_dataset, test_dataset = split_dataset(dataset) + + lpsi = LPSI() + + alpha, thres, auc, f1, pred = lpsi.train(adj, train_dataset) + + print("LPSI:") + + print(f"train auc: {auc:.3f}, train f1: {f1:.3f}") + + metric=lpsi.test(adj, test_dataset, alpha, thres) + + print(f"test acc: {metric.acc:.3f}, test pr: {metric.pr:.3f}, test re: {metric.re:.3f}, test f1: {metric.f1:.3f}, test auc: {metric.auc:.3f}") """ laplacian = csgraph.laplacian(adj, normed=True) laplacian = np.array(coo_matrix.todense(laplacian)) @@ -137,9 +185,9 @@ def test(self, adj, test_dataset, alpha, thres): influ_vec = influ_mat[:, -1] x = self.predict(laplacian, num_node, alpha, influ_vec) test_acc += accuracy_score(seed_vec, x >= thres) - test_pr += precision_score(seed_vec, x >= thres) - test_re += recall_score(seed_vec, x >= thres) - test_f1 += f1_score(seed_vec, x >= thres) + test_pr += precision_score(seed_vec, x >= thres, zero_division = 1) + test_re += recall_score(seed_vec, x >= thres, zero_division = 1) + test_f1 += f1_score(seed_vec, x >= thres, zero_division = 1) test_auc += roc_auc_score(seed_vec, x) test_acc = test_acc / test_num @@ -218,6 +266,28 @@ def train(self, adj, train_dataset, k_list=[5, 10, 50, 100]): - opt_auc (float): Optimal Area Under the Curve (AUC) value. - train_f1 (float): Training F1 score value. + + Example: + + from data.utils import load_dataset, diffusion_generation, split_dataset + + from Prescribed import NetSleuth + + data_name = 'karate' + + graph = load_dataset(data_name) + + dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) + + adj, train_dataset, test_dataset =split_dataset(dataset) + + netSleuth = NetSleuth() + + k, auc, f1=netSleuth.train(adj, train_dataset) + + print("NetSleuth:") + + print(f"train auc: {auc:.3f}, train f1: {f1:.3f}") """ # Y should be no more than number of nodes num_node = adj.shape[0] @@ -247,7 +317,7 @@ def train(self, adj, train_dataset, k_list=[5, 10, 50, 100]): seed_vec = influ_mat[:, 0] influ_vec = influ_mat[:, -1] x = self.predict(G, opt_k, influ_vec) - train_f1 += f1_score(seed_vec, x) + train_f1 += f1_score(seed_vec, x, zero_division = 1) train_f1 = train_f1 / train_num return opt_k, opt_auc, train_f1 @@ -268,6 +338,32 @@ def test(self, adj, test_dataset, k): Returns: - metric (Metric): Evaluation metric containing accuracy, precision, recall, F1 score, and AUC. + + Example: + + from data.utils import load_dataset, diffusion_generation, split_dataset + + from Prescribed import NetSleuth + + data_name = 'karate' + + graph = load_dataset(data_name) + + dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) + + adj, train_dataset, test_dataset =split_dataset(dataset) + + netSleuth = NetSleuth() + + k, auc, f1=netSleuth.train(adj, train_dataset) + + print("NetSleuth:") + + print(f"train auc: {auc:.3f}, train f1: {f1:.3f}") + + metric = netSleuth.test(adj, test_dataset, k) + + print(f"test acc: {metric.acc:.3f}, test pr: {metric.pr:.3f}, test re: {metric.re:.3f}, test f1: {metric.f1:.3f}, test auc: {metric.auc:.3f}") """ G = nx.from_numpy_array(adj) test_num = len(test_dataset) @@ -281,9 +377,9 @@ def test(self, adj, test_dataset, k): influ_vec = influ_mat[:, -1] x = self.predict(G, k, influ_vec) test_acc += accuracy_score(seed_vec, x) - test_pr += precision_score(seed_vec, x) - test_re += recall_score(seed_vec, x) - test_f1 += f1_score(seed_vec, x) + test_pr += precision_score(seed_vec, x, zero_division = 1) + test_re += recall_score(seed_vec, x, zero_division = 1) + test_f1 += f1_score(seed_vec, x, zero_division = 1) test_auc += roc_auc_score(seed_vec, x) test_acc = test_acc / test_num @@ -432,6 +528,28 @@ def train(self, adj, train_dataset, Y_list=[5, 10, 20, 50]): - opt_auc (float): Optimal Area Under the Curve (AUC) value. - train_f1 (float): Training F1 score value. + + Example: + + from data.utils import load_dataset, diffusion_generation, split_dataset + + from Prescribed import OJC + + data_name = 'karate' + + graph = load_dataset(data_name) + + dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) + + adj, train_dataset, test_dataset =split_dataset(dataset) + + ojc = OJC() + + Y, auc, f1 =ojc.train(adj, train_dataset) + + print("OJC:") + + print(f"train auc: {auc:.3f}, train f1: {f1:.3f}") """ # Y should be no more than number of nodes num_node = adj.shape[0] @@ -463,7 +581,7 @@ def train(self, adj, train_dataset, Y_list=[5, 10, 20, 50]): num_source = len(influ_vec[influ_vec == 1]) I = (influ_vec == 1).nonzero()[0].tolist() x = self.predict(G, opt_Y, I, influ_vec, num_source) - train_f1 += f1_score(seed_vec, x) + train_f1 += f1_score(seed_vec, x, zero_division = 1) train_f1 = train_f1 / train_num return opt_Y, opt_auc, train_f1 @@ -483,6 +601,32 @@ def test(self, adj, test_dataset, Y): Returns: - metric (Metric): Evaluation metric containing accuracy, precision, recall, F1 score, and AUC. + + Example: + + from data.utils import load_dataset, diffusion_generation, split_dataset + + from Prescribed import OJC + + data_name = 'karate' + + graph = load_dataset(data_name) + + dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) + + adj, train_dataset, test_dataset =split_dataset(dataset) + + ojc = OJC() + + Y, auc, f1 =ojc.train(adj, train_dataset) + + print("OJC:") + + print(f"train auc: {auc:.3f}, train f1: {f1:.3f}") + + metric=ojc.test(adj, test_dataset, Y) + + print(f"test acc: {metric.acc:.3f}, test pr: {metric.pr:.3f}, test re: {metric.re:.3f}, test f1: {metric.f1:.3f}, test auc: {metric.auc:.3f}") """ G = nx.from_scipy_sparse_array(adj) test_num = len(test_dataset) @@ -498,9 +642,9 @@ def test(self, adj, test_dataset, Y): I = (influ_vec == 1).nonzero()[0].tolist() x = self.predict(G, Y, I, influ_vec, num_source) test_acc += accuracy_score(seed_vec, x) - test_pr += precision_score(seed_vec, x) - test_re += recall_score(seed_vec, x) - test_f1 += f1_score(seed_vec, x) + test_pr += precision_score(seed_vec, x, zero_division = 1) + test_re += recall_score(seed_vec, x, zero_division = 1) + test_f1 += f1_score(seed_vec, x, zero_division = 1) test_auc += roc_auc_score(seed_vec, x) test_acc = test_acc / test_num diff --git a/all.py b/all.py index efd936a..59b3125 100644 --- a/all.py +++ b/all.py @@ -3,7 +3,7 @@ from GNN.GCNSI.main import GCNSI from GNN.IVGD.main import IVGD from GNN.SLVAE.main import SLVAE -data_name = 'karate' # 'karate', 'dolphins', 'jazz', 'netscience', 'cora_ml', 'power_grid', , 'meme8000', 'digg16000' +data_name = 'dolphins' # 'karate', 'dolphins', 'jazz', 'netscience', 'cora_ml', 'power_grid', , 'meme8000', 'digg16000' graph = load_dataset(data_name) if data_name not in ['meme8000', 'digg16000']: dataset = diffusion_generation(graph=graph, infect_prob=0.3, diff_type='IC', sim_num=100, seed_ratio=0.1) diff --git a/data/utils.py b/data/utils.py index c8cf90e..7b01099 100644 --- a/data/utils.py +++ b/data/utils.py @@ -13,7 +13,7 @@ def load_dataset(dataset, data_dir='data'): Args: - - dataset (str): The name of the dataset file. + - dataset (str): The name of the dataset file, 'karate', 'dolphins', 'jazz', 'netscience', 'cora_ml', 'power_grid','meme8000', 'digg16000'. - data_dir (str): The directory where the dataset files are stored. Default is 'data'.