In [1]:
import random
from collections import defaultdict

import mlflow
import networkx as nx
import numpy as np
import torch
#from torch_geometric.utils import from_networkx
import dgl

from tqdm import tqdm as tq

import json
import os
import tempfile
import time
from collections import defaultdict
import random
# Benchmark 
import mlflow
import torch.nn.functional as F
from tqdm import tqdm as tq

#from explain_methods import *
#from models import Net1
#from .models_dgl import Net1

Using backend: pytorch


ImportError: attempted relative import with no known parent package

In [5]:
import torch
import torch.nn.functional as F
from torch.nn import Sequential, Linear, ReLU
#from torch_geometric.nn import GNNExplainer, GINConv, MessagePassing, GCNConv, GraphConv
from dgl.nn import GraphConv#GCNConv

class Net1(torch.nn.Module):
    def __init__(self, num_node_features, num_classes, num_layers, concat_features, conv_type):
        super(Net1, self).__init__()
        dim = 32
        self.convs = torch.nn.ModuleList()
        if conv_type == 'GraphConv':#'GCNConv':
            conv_class = GraphConv
            #kwargs = {'add_self_loops': False}
        #elif conv_type == 'GraphConv':
        #    conv_class = GraphConv
        #    kwargs = {}
        else:
            raise RuntimeError(f"conv_type {conv_type} not supported")

        self.convs.append(conv_class(num_node_features, dim, allow_zero_in_degree = True))#, **kwargs))
        for i in range(num_layers - 1):
            self.convs.append(conv_class(dim, dim, allow_zero_in_degree = True))#, **kwargs))
        self.concat_features = concat_features
        if concat_features:
            self.fc = Linear(dim * num_layers + num_node_features, num_classes)
        else:
            self.fc = Linear(dim, num_classes)

    def forward(self, g, x, weight = None, edge_weight=None):
        xs = [x]
        for conv in self.convs:
            x = conv(g, x, weight, edge_weight)
            x = F.relu(x)
            xs.append(x)
        if self.concat_features:
            x = torch.cat(xs, dim=1)
        x = self.fc(x)
        return F.log_softmax(x, dim=1)


In [3]:

class Benchmark(object):
    '''
    三个Benchmark的父类
    
    需要子类继承：create_dataset和evaluate_explanation这两个方法
    '''
    NUM_GRAPHS = 2
    TEST_RATIO = 0.5
    PGMEXPLAINER_SUBSAMPLE_PER_GRAPH = 20
    METHODS = ['pagerank', 'pgmexplainer', 'occlusion', 'distance', 'gradXact', 'random', 'sa_node',
               'ig_node', 'sa', 'ig', 'gnnexplainer',
               'subgraphx']
    LR = 0.0003
    EPOCHS = 400
    WEIGHT_DECAY = 0

    def __init__(self, sample_count, num_layers, concat_features, conv_type):
        arguments = {
            'sample_count': sample_count,
            'num_layers': num_layers,
            'concat_features': concat_features,
            'conv_type': conv_type,
            'num_graphs': self.NUM_GRAPHS,
            'test_ratio': self.TEST_RATIO,
        }
        self.sample_count = sample_count
        self.num_layers = num_layers
        self.concat_features = concat_features
        self.conv_type = conv_type
        mlflow.log_params(arguments)
        mlflow.log_param('PGMEXPLAINER_SUBSAMPLE_PER_GRAPH', self.PGMEXPLAINER_SUBSAMPLE_PER_GRAPH)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def create_dataset(self):
        raise NotImplementedError

    def evaluate_explanation(self, explain_function, model, test_dataset, explain_name):
        raise NotImplementedError

    def subsample_nodes(self, explain_function, nodes):
        if explain_function.explain_function != explain_pgmexplainer:
            return nodes
        return random.sample(nodes, self.PGMEXPLAINER_SUBSAMPLE_PER_GRAPH)

    @staticmethod
    def aggregate_directions(edge_mask, edge_index):
        edge_values = defaultdict(float)
        for x in range(len(edge_mask)):
            u, v = edge_index[:, x]
            u, v = u.item(), v.item()
            if u > v:
                u, v = v, u
            edge_values[(u, v)] += edge_mask[x]
        return edge_values

    def train(self, model, optimizer, train_loader):
        model.train()
        loss_all = 0
        for data in train_loader:
            data = data.to(self.device)
            optimizer.zero_grad()
            output = model(data, data.ndata['x'])
            loss = F.nll_loss(output, data.ndata['y'])
            loss.backward()
            loss_all += loss.item()
            optimizer.step()
        return loss_all / len(train_loader)

    def test(self, model, loader):
        model.eval()

        correct = 0
        total = 0
        for data in loader:
            data = data.to(self.device)
            output = model(data, data.ndata['x'])
            pred = output.max(dim=1)[1]
            correct += pred.eq(data.ndata['y']).sum().item()
            total += len(data.ndata['y'])
        return correct / total

    def train_and_test(self, model, train_loader, test_loader):
        optimizer = torch.optim.Adam(model.parameters(), lr=self.LR, weight_decay=self.WEIGHT_DECAY)
        mlflow.log_param('weight_decay', self.WEIGHT_DECAY)
        mlflow.log_param('lr', self.LR)
        mlflow.log_param('epochs', self.EPOCHS)
        pbar = tq(range(self.EPOCHS))
        for epoch in pbar:
            train_loss = self.train(model, optimizer, train_loader)
            train_acc = self.test(model, train_loader)
            test_acc = self.test(model, test_loader)
            pbar.set_postfix(train_loss=train_loss, train_acc=train_acc, test_acc=test_acc)
        return train_acc, test_acc

    def is_trained_model_valid(self, test_acc):
        return True

    def run(self):
        print(f"Using device {self.device}")
        benchmark_name = self.__class__.__name__
        all_explanations = defaultdict(list)
        all_runtimes = defaultdict(list)
        for experiment_i in tq(range(self.sample_count)):
            dataset = [self.create_dataset() for i in range(self.NUM_GRAPHS)]
            split_point = int(len(dataset) * self.TEST_RATIO)
            test_dataset = dataset[:split_point]
            train_dataset = dataset[split_point:]
            data = dataset[0]
            model = Net1(data.num_node_features, data.num_classes, self.num_layers, self.concat_features,
                         self.conv_type).to(
                self.device)
            train_acc, test_acc = self.train_and_test(model, train_dataset, test_dataset)
            if not self.is_trained_model_valid(test_acc):
                print('Model accuracy was not valid, ignoring this experiment')
                continue
            model.eval()
            metrics = {
                'train_acc': train_acc,
                'test_acc': test_acc,
            }
            mlflow.log_metrics(metrics, step=experiment_i)

            for explain_name in self.METHODS:
                explain_function = eval('explain_' + explain_name)
                duration_samples = []

                def time_wrapper(*args, **kwargs):
                    start_time = time.time()
                    result = explain_function(*args, **kwargs)
                    end_time = time.time()
                    duration_seconds = end_time - start_time
                    duration_samples.append(duration_seconds)
                    return result

                time_wrapper.explain_function = explain_function
                accs = self.evaluate_explanation(time_wrapper, model, test_dataset, explain_name)
                print(f'Benchmark:{benchmark_name} Run #{experiment_i + 1}, Explain Method: {explain_name}, Accuracy: {np.mean(accs)}')
                all_explanations[explain_name].append(list(accs))
                all_runtimes[explain_name].extend(duration_samples)
                metrics = {
                    f'explain_{explain_name}_acc': np.mean(accs),
                    f'time_{explain_name}_s_avg': np.mean(duration_samples),
                }
                with tempfile.TemporaryDirectory() as tmpdir:
                    file_path = os.path.join(tmpdir, 'accuracies.json')
                    json.dump(all_explanations, open(file_path, 'w'), indent=2)
                    mlflow.log_artifact(file_path)
                mlflow.log_metrics(metrics, step=experiment_i)
            print(f'Benchmark:{benchmark_name} Run #{experiment_i + 1} finished. Average Explanation Accuracies for each method:')
            accuracies_summary = {}
            for name, run_accs in all_explanations.items():
                run_accs = [np.mean(single_run_acc) for single_run_acc in run_accs]
                accuracies_summary[name] = {'avg': np.mean(run_accs), 'std': np.std(run_accs), 'count': len(run_accs)}
                print(f'{name} : avg:{np.mean(run_accs)} std:{np.std(run_accs)}')
            runtime_summary = {}
            for name, runtimes in all_runtimes.items():
                runtime_summary[name] = {'avg': np.mean(runtimes), 'std': np.std(runtimes)}
            with tempfile.TemporaryDirectory() as tmpdir:
                file_path = os.path.join(tmpdir, 'summary.json')
                summary = {'accuracies': accuracies_summary, 'runtime': runtime_summary}
                json.dump(summary, open(file_path, 'w'), indent=2)
                mlflow.log_artifact(file_path)
#Infection
class Infection(Benchmark):
    NUM_GRAPHS = 10
    TEST_RATIO = 0.4
    LR = 0.005

    @staticmethod
    def get_accuracy(correct_ids, edge_mask, edge_index):
        correct_count = 0
        correct_edges = list(zip(correct_ids, correct_ids[1:]))

        for x in np.argsort(-edge_mask)[:len(correct_ids)]:
            u, v = edge_index[:, x]
            u, v = u.item(), v.item()
            if (u, v) in correct_edges:
                correct_count += 1
        return correct_count / len(correct_edges)

    @staticmethod
    def get_accuracy_undirected(correct_ids, edge_values):
        correct_count = 0
        correct_edges = list(zip(correct_ids, correct_ids[1:]))

        top_edges = list(sorted([(-value, edge) for edge, value in edge_values.items()]))[:len(correct_ids)]
        for _, (u, v) in top_edges:
            if (u, v) in correct_edges or (v, u) in correct_edges:
                correct_count += 1
        return correct_count / len(correct_edges)

    def create_dataset(self):
        max_dist = self.num_layers  # anything larger than max_dist has a far away label
        g = nx.erdos_renyi_graph(250, 0.004, directed=True)#1000
        N = len(g.nodes())
        infected_nodes = random.sample(g.nodes(), 50)
        g.add_node('X')  # dummy node for easier computation, will be removed in the end
        for u in infected_nodes:
            g.add_edge('X', u)
        shortest_path_length = nx.single_source_shortest_path_length(g, 'X')
        unique_solution_nodes = []
        unique_solution_explanations = []
        labels = []
        features = np.zeros((N, 2))
        for i in range(N):
            if i == 'X':
                continue
            length = shortest_path_length.get(i, 100) - 1  # 100 is inf distance
            labels.append(min(max_dist + 1, length))
            col = 0 if i in infected_nodes else 1
            features[i, col] = 1
            if 0 < length <= max_dist:
                path_iterator = iter(nx.all_shortest_paths(g, 'X', i))
                unique_shortest_path = next(path_iterator)
                if next(path_iterator, 0) != 0:
                    continue
                unique_shortest_path.pop(0)  # pop 'X' node
                if len(unique_shortest_path) == 0:
                    continue
                unique_solution_explanations.append(unique_shortest_path)
                unique_solution_nodes.append(i)
        g.remove_node('X')
        data = dgl.from_networkx(g)
        data.ndata['x'] = torch.tensor(features, dtype=torch.float)
        #data.x = torch.tensor(features, dtype=torch.float)
        data.ndata['y'] = torch.tensor(labels)
        #data.y = torch.tensor(labels)
        data.unique_solution_nodes = unique_solution_nodes
        data.unique_solution_explanations = unique_solution_explanations
        data.num_classes = 1 + max_dist + 1
        data.num_node_features = 2
        print('created one')
        return data

    def is_trained_model_valid(self, test_acc):
        return test_acc > 0.999

    def evaluate_explanation(self, explain_function, model, test_dataset, explain_name):
        accs = []
        misclassify_count = 0
        for data in test_dataset:
            _, pred = model(data, data.ndata['x']).max(dim=1)#data.x data.edge_index
            nodes_to_test = list(zip(data.unique_solution_nodes, data.unique_solution_explanations))
            nodes_to_test = self.subsample_nodes(explain_function, nodes_to_test)
            pbar = tq(nodes_to_test, disable=False)
            tested_nodes = 0
            for node_idx, correct_ids in pbar:
                if pred[node_idx] != data.y[node_idx]:
                    misclassify_count += 1
                    continue
                tested_nodes += 1
                edge_mask = explain_function(model, node_idx, data.ndata['x'], data.edges(), data.ndata['y'][node_idx].item())
                explain_acc = self.get_accuracy(correct_ids, edge_mask, data.edges())
                accs.append(explain_acc)
                pbar.set_postfix(acc=np.mean(accs))
            mlflow.log_metric('tested_nodes_per_graph', tested_nodes)
        return accs

In [6]:
A = Infection(10,4,True,'GraphConv')
A.run()

  0%|          | 0/10 [00:00<?, ?it/s]

Using device cuda
created one
created one
created one
created one
created one
created one
created one
created one
created one
created one


100%|██████████| 400/400 [00:36<00:00, 10.97it/s, test_acc=0.999, train_acc=1, train_loss=0.00372]
 10%|█         | 1/10 [00:36<05:30, 36.67s/it]

Model accuracy was not valid, ignoring this experiment
created one
created one
created one
created one
created one
created one
created one
created one
created one




created one


100%|██████████| 400/400 [00:36<00:00, 10.93it/s, test_acc=0.999, train_acc=1, train_loss=0.00362]
 20%|██        | 2/10 [01:13<04:54, 36.76s/it]

Model accuracy was not valid, ignoring this experiment
created one
created one
created one
created one
created one
created one
created one
created one
created one
created one


100%|██████████| 400/400 [00:36<00:00, 10.96it/s, test_acc=1, train_acc=1, train_loss=0.00368]
 20%|██        | 2/10 [01:50<07:20, 55.09s/it]


NameError: name 'explain_pagerank' is not defined

In [1]:
import dgl

Using backend: pytorch


In [11]:
g = dgl.graph(([0,1],[1,2]))
g.xxx = '123'
netg = g.to_networkx()
netg.edges()

OutMultiEdgeDataView([(0, 1), (1, 2)])

In [12]:
g.edges()

(tensor([0, 1]), tensor([1, 2]))

In [19]:
a,b  = list(enumerate(zip(*[g.edges()[0].cpu().numpy(),g.edges()[1].cpu().numpy()])))[0]
c = dict()
c[b] = a
c

{(0, 1): 0}

In [24]:
import torch 
torch.combinations(g.edges()[0],g.edges()[1])


TypeError: combinations(): argument 'r' (position 2) must be int, not Tensor