In [None]:
from dataset.voc_superpixels import VOCSuperpixels
from dataset.coco_superpixels import COCOSuperpixels

import torch_geometric
import torch
import networkx as nx
from tqdm import tqdm
import numpy as np

In [None]:
def join_dataset_splits(datasets):
    """Join train, val, test datasets into one dataset object.

    Args:
        datasets: list of 3 PyG datasets to merge

    Returns:
        joint dataset with `split_idxs` property storing the split indices
    """
    assert len(datasets) == 3, "Expecting train, val, test datasets"

    n1, n2, n3 = len(datasets[0]), len(datasets[1]), len(datasets[2])
    data_list = [datasets[0].get(i) for i in range(n1)] + \
                [datasets[1].get(i) for i in range(n2)] + \
                [datasets[2].get(i) for i in range(n3)]

    datasets[0]._indices = None
    datasets[0]._data_list = data_list
    datasets[0].data, datasets[0].slices = datasets[0].collate(data_list)
    split_idxs = [list(range(n1)),
                  list(range(n1, n1 + n2)),
                  list(range(n1 + n2, n1 + n2 + n3))]
    datasets[0].split_idxs = split_idxs

    return datasets[0]

In [None]:
def get_stats(voc_dataset):
    total_nodes, total_edges, avg_nodes, avg_edges = 0,0,0,0
    all_node_degs = torch.empty(0)
    all_avg_shortest_paths, all_diameters = [], []
    for g in tqdm(voc_dataset):
        total_nodes += g.num_nodes
        total_edges += g.num_edges
        idx = g.edge_index[1]
        deg = torch_geometric.utils.degree(idx, g.num_nodes, dtype=torch.long)
        all_node_degs = torch.cat((all_node_degs, deg))
        g_nx = torch_geometric.utils.to_networkx(g)
        
        
        # !!! NOTE
        # For shortest path and diameter computations, we convert the digraph to undirected
        g_nx = g_nx.to_undirected()
        all_avg_shortest_paths.append(nx.average_shortest_path_length(g_nx))
        all_diameters.append(nx.diameter(g_nx))
        
    print("total graphs: ", len(voc_dataset))
    print("total nodes: ", total_nodes)
    print("total edges: ", total_edges)
    print("avg_nodes: ", total_nodes/len(voc_dataset)*1.0)
    print("avg_edges: ", total_edges/len(voc_dataset)*1.0)
    print("mean node deg: ", torch.mean(all_node_degs))
    print("avg. of avg. shortest paths: ", np.mean(all_avg_shortest_paths))
    print("std. of avg. shortest paths: ", np.std(all_avg_shortest_paths))
    print("avg. diameter: ", np.mean(all_diameters))
    print("std. diameter: ", np.std(all_diameters))

In [None]:
voc_dataset = join_dataset_splits(
        [VOCSuperpixels(root='../../datasets/VOCSuperpixels', name='edge_wt_only_coord',
                        slic_compactness=10,
                        split=split)
         for split in ['train', 'val', 'test']]
    )
# get_stats(voc_dataset)

In [None]:
voc_dataset = join_dataset_splits(
        [VOCSuperpixels(root='../../datasets/VOCSuperpixels', name='edge_wt_region_boundary',
                        slic_compactness=10,
                        split=split)
         for split in ['train', 'val', 'test']]
    )
get_stats(voc_dataset)

In [None]:
coco_dataset = join_dataset_splits(
        [COCOSuperpixels(root='../../datasets/COCOSuperpixels', name='edge_wt_only_coord',
                        slic_compactness=10,
                        split=split)
         for split in ['train', 'val', 'test']]
    )
#get_stats(coco_dataset)