In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
import copy

import os
import sys
import torch
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import seaborn as sns
from collections import defaultdict
from tqdm import tqdm_notebook as tqdm

from utils import filter_out_isolate, draw_cluster_info, draw_isolate_cluster_info, draw_trainer_info, print_data_info


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def print_data_info(data):
    
    print('Info (attributes) of a single data instance')
    print(data, '\n number of nodes: ', data.num_nodes, '\n number of edges: ', data.num_edges, \
      '\n number of features per ndoe: ', data.num_node_features, '\n number of edge features: ', data.num_edge_features, \
#       '\n number of classifying labels of dataset: ', dataset.num_classes, \
      '\n all the attributes of data: ', data.keys)

In [4]:
local_data_root = '/media/xiangli/storage1/projects/tmpdata/'

## Belong to the Planetoid series

### Cora dataset

In [5]:
from torch_geometric.datasets import Planetoid
data_name = 'Cora'
dataset = Planetoid(root = local_data_root + 'Planetoid/Cora', name=data_name)
data = dataset[0]
print_data_info(data)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!
Info (attributes) of a single data instance
Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708]) 
 number of nodes:  2708 
 number of edges:  10556 
 number of features per ndoe:  1433 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y', 'train_ma

### Citeseer Dataset

In [6]:
from torch_geometric.datasets import Planetoid
data_name = 'CiteSeer'
dataset = Planetoid(root = local_data_root + 'Planetoid/CiteSeer', name=data_name)
data = dataset[0]
print_data_info(data)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!
Info (attributes) of a single data instance
Data(edge_index=[2, 9104], test_mask=[3327], train_mask=[3327], val_mask=[3327], x=[3327, 3703], y=[3327]) 
 number of nodes:  3327 
 number of edges:  9104 
 number of features per ndoe:  3703 
 number of edge features:  0 
 all the attributes of data:  ['x'

### PubMed Dataset

In [7]:
from torch_geometric.datasets import Planetoid
data_name = 'PubMed'
dataset = Planetoid(root = local_data_root + 'Planetoid/PubMed', name=data_name)
data = dataset[0]
print_data_info(data)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.test.index
Processing...
Done!
Info (attributes) of a single data instance
Data(edge_index=[2, 88648], test_mask=[19717], train_mask=[19717], val_mask=[19717], x=[19717, 500], y=[19717]) 
 number of nodes:  19717 
 number of edges:  88648 
 number of features per ndoe:  500 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_in

### Stanford Education Dataset

In [6]:
from torch_geometric.datasets import SNAPDataset
# available_datasets = {
#         'ego-facebook': ['facebook.tar.gz'],
#         'ego-gplus': ['gplus.tar.gz'],   # data format failure
#         'ego-twitter': ['twitter.tar.gz'],    # too large for processing
#         'soc-epinions1': ['soc-Epinions1.txt.gz'],
#         'soc-livejournal1': ['soc-LiveJournal1.txt.gz'],
#         'soc-pokec': ['soc-pokec-relationships.txt.gz'],
#         'soc-slashdot0811': ['soc-Slashdot0811.txt.gz'],
#         'soc-slashdot0922': ['soc-Slashdot0902.txt.gz'],
#         'wiki-vote': ['wiki-Vote.txt.gz'],
#     }
data_name = 'ego-facebook'
dataset = SNAPDataset(root = local_data_root + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  10
Info (attributes) of a single data instance
Data(circle=[325], circle_batch=[325], edge_index=[2, 5732], x=[348, 1406]) 
 number of nodes:  348 
 number of edges:  5732 
 number of features per ndoe:  1406 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'circle', 'circle_batch']


In [None]:
data_name = 'ego-twitter'
dataset = SNAPDataset(root = local_data_root + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

Processing...


In [7]:
data_name = 'wiki-vote'
dataset = SNAPDataset(root = local_data_root + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 103689]) 
 number of nodes:  7115 
 number of edges:  103689 
 number of features per ndoe:  0 
 number of edge features:  0 
 all the attributes of data:  ['edge_index']


In [8]:
data_name = 'soc-pokec'
dataset = SNAPDataset(root = local_data_root + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 30622560]) 
 number of nodes:  1632804 
 number of edges:  30622560 
 number of features per ndoe:  0 
 number of edge features:  0 
 all the attributes of data:  ['edge_index']


In [9]:
data_name = 'soc-livejournal1'
dataset = SNAPDataset(root = local_data_root + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

Downloading https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz
/media/xiangli/storage1/projects/tmpdata/soc-livejournal1/soc-livejournal1/raw/soc-LiveJournal1.txt.gz
Extracting /media/xiangli/storage1/projects/tmpdata/soc-livejournal1/soc-livejournal1/raw/soc-LiveJournal1.txt.gz
Processing...
Done!
number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 68993773]) 
 number of nodes:  4847571 
 number of edges:  68993773 
 number of features per ndoe:  0 
 number of edge features:  0 
 all the attributes of data:  ['edge_index']


In [10]:
data_name = 'soc-slashdot0811'
dataset = SNAPDataset(root = local_data_root + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)


Downloading https://snap.stanford.edu/data/soc-Slashdot0811.txt.gz
/media/xiangli/storage1/projects/tmpdata/soc-slashdot0811/soc-slashdot0811/raw/soc-Slashdot0811.txt.gz
Extracting /media/xiangli/storage1/projects/tmpdata/soc-slashdot0811/soc-slashdot0811/raw/soc-Slashdot0811.txt.gz
Processing...
Done!
number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 905468]) 
 number of nodes:  77360 
 number of edges:  905468 
 number of features per ndoe:  0 
 number of edge features:  0 
 all the attributes of data:  ['edge_index']


### TUD dataset

In [14]:
from torch_geometric.datasets import TUDataset
data_name = 'FIRSTMM_DB'
dataset = TUDataset(root = local_data_root + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)


Downloading http://ls11-www.cs.tu-dortmund.de/people/morris/graphkerneldatasets/FIRSTMM_DB.zip
Extracting /media/xiangli/storage1/projects/tmpdata/FIRSTMM_DB/FIRSTMM_DB/FIRSTMM_DB.zip
Processing...
Done!
number of data:  41
Info (attributes) of a single data instance
Data(edge_attr=[6420, 0], edge_index=[2, 6420], x=[1432, 5], y=[1]) 
 number of nodes:  1432 
 number of edges:  6420 
 number of features per ndoe:  5 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'edge_attr', 'y']


In [32]:
from torch_geometric.datasets import TUDataset
data_name = 'REDDIT-MULTI-12K'
dataset = TUDataset(root = local_data_root + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)


Downloading http://ls11-www.cs.tu-dortmund.de/people/morris/graphkerneldatasets/REDDIT-MULTI-12K.zip
Extracting /media/xiangli/storage1/projects/tmpdata/REDDIT-MULTI-12K/REDDIT-MULTI-12K/REDDIT-MULTI-12K.zip
Processing...
Done!
number of data:  11929
Info (attributes) of a single data instance
Data(edge_index=[2, 118], y=[1]) 
 number of nodes:  59 
 number of edges:  118 
 number of features per ndoe:  0 
 number of edge features:  0 
 all the attributes of data:  ['edge_index', 'y']


## Belong to the CitationFull series

In [16]:
# assert name in ['cora', 'cora_ml', 'citeseer', 'dblp', 'pubmed']
from torch_geometric.datasets import CitationFull
data_name = 'cora_ml'
dataset = CitationFull(root = local_data_root + data_name,  name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

Downloading https://github.com/abojchevski/graph2gauss/raw/master/data/cora_ml.npz
Processing...
Done!
number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 16316], x=[2995, 2879], y=[2995]) 
 number of nodes:  2995 
 number of edges:  16316 
 number of features per ndoe:  2879 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']


In [17]:
data_name = 'citeseer'
dataset = CitationFull(root = local_data_root + data_name,  name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

Downloading https://github.com/abojchevski/graph2gauss/raw/master/data/citeseer.npz
Processing...
Done!
number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 10674], x=[4230, 602], y=[4230]) 
 number of nodes:  4230 
 number of edges:  10674 
 number of features per ndoe:  602 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']


In [19]:
data_name = 'dblp'
dataset = CitationFull(root = local_data_root + data_name,  name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

Downloading https://github.com/abojchevski/graph2gauss/raw/master/data/dblp.npz
Processing...
Done!
number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 105734], x=[17716, 1639], y=[17716]) 
 number of nodes:  17716 
 number of edges:  105734 
 number of features per ndoe:  1639 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']


In [20]:
data_name = 'pubmed'
dataset = CitationFull(root = local_data_root + data_name,  name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

Downloading https://github.com/abojchevski/graph2gauss/raw/master/data/pubmed.npz
Processing...
Done!
number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 88648], x=[19717, 500], y=[19717]) 
 number of nodes:  19717 
 number of edges:  88648 
 number of features per ndoe:  500 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']


### CoraFull dataset (alias for CitationFull with name "Cora")

In [18]:
from torch_geometric.datasets import CoraFull
data_name = 'CoraFull'
dataset = CoraFull(root = local_data_root + 'CoralFull')
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

Downloading https://github.com/abojchevski/graph2gauss/raw/master/data/cora.npz
Processing...
Done!
number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 126842], x=[19793, 8710], y=[19793]) 
 number of nodes:  19793 
 number of edges:  126842 
 number of features per ndoe:  8710 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']


### Coauthor dataset

In [21]:
from torch_geometric.datasets import Coauthor
data_name = 'cs'
dataset = Coauthor(root = local_data_root + 'Coauthor/' + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 163788], x=[18333, 6805], y=[18333]) 
 number of nodes:  18333 
 number of edges:  163788 
 number of features per ndoe:  6805 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']


### Amazon dataset

In [16]:
from torch_geometric.datasets import Amazon
data_name = 'computers'    # can also be 'computers', 'photos'
dataset = Amazon(root = local_data_root + 'Amazon/' + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 491722], x=[13752, 767], y=[13752]) 
 number of nodes:  13752 
 number of edges:  491722 
 number of features per ndoe:  767 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']


In [17]:
data_name = 'photo'    # can also be 'computers', 'photos'
dataset = Amazon(root = local_data_root + 'Amazon/' + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 238162], x=[7650, 745], y=[7650]) 
 number of nodes:  7650 
 number of edges:  238162 
 number of features per ndoe:  745 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']


### PPI dataset

In [19]:
from torch_geometric.datasets import PPI
data_name = 'PPI'    # can also be 'computers'
dataset = PPI(root = local_data_root + 'PPI/' + data_name)
print('number of data: ', len(dataset))
for i in range(len(dataset)):
    print('Infor for the data #[{}]'.format(i))
    data = dataset[i]
    print_data_info(data)

number of data:  20
Infor for the data #[0]
Info (attributes) of a single data instance
Data(edge_index=[2, 32318], x=[1767, 50], y=[1767, 121]) 
 number of nodes:  1767 
 number of edges:  32318 
 number of features per ndoe:  50 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']
Infor for the data #[1]
Info (attributes) of a single data instance
Data(edge_index=[2, 29704], x=[1377, 50], y=[1377, 121]) 
 number of nodes:  1377 
 number of edges:  29704 
 number of features per ndoe:  50 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']
Infor for the data #[2]
Info (attributes) of a single data instance
Data(edge_index=[2, 59644], x=[2263, 50], y=[2263, 121]) 
 number of nodes:  2263 
 number of edges:  59644 
 number of features per ndoe:  50 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']
Infor for the data #[3]
Info (attributes) of a single data instance
Data(edge_index=[2, 6

In [18]:
from torch_geometric.datasets import PPI
data_name = 'PPI'    # can also be 'computers'
dataset = PPI(root = local_data_root + 'PPI/' + data_name, split="test")
print('number of data: ', len(dataset))
for i in range(len(dataset)):
    print('Infor for the data #[{}]'.format(i))
    data = dataset[i]
    print_data_info(data)

number of data:  2
Infor for the data #[0]
Info (attributes) of a single data instance
Data(edge_index=[2, 100648], x=[3224, 50], y=[3224, 121]) 
 number of nodes:  3224 
 number of edges:  100648 
 number of features per ndoe:  50 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']
Infor for the data #[1]
Info (attributes) of a single data instance
Data(edge_index=[2, 61328], x=[2300, 50], y=[2300, 121]) 
 number of nodes:  2300 
 number of edges:  61328 
 number of features per ndoe:  50 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']


In [8]:
from torch_geometric.datasets import PPI
data_name = 'PPI'    # can also be 'computers'
dataset = PPI(root = local_data_root + 'PPI/' + data_name, split="val")
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  2
Info (attributes) of a single data instance
Data(edge_index=[2, 97446], x=[3230, 50], y=[3230, 121]) 
 number of nodes:  3230 
 number of edges:  97446 
 number of features per ndoe:  50 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y']


### Reddit dataset

In [20]:
from torch_geometric.datasets import Reddit
data_name = 'Reddit'    # can also be 'computers'
dataset = Reddit(root = local_data_root + '/' + data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 114615892], test_mask=[232965], train_mask=[232965], val_mask=[232965], x=[232965, 602], y=[232965]) 
 number of nodes:  232965 
 number of edges:  114615892 
 number of features per ndoe:  602 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y', 'train_mask', 'val_mask', 'test_mask']


In [None]:
from torch_geometric.datasets import Flickr

### QM7b dataset

In [21]:
from torch_geometric.datasets import QM7b
data_name = 'QM7b'    # can also be 'computers'
dataset = QM7b(root = local_data_root + '/' + data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  7211
Info (attributes) of a single data instance
Data(edge_attr=[25], edge_index=[2, 25], y=[1, 14]) 
 number of nodes:  5 
 number of edges:  25 
 number of features per ndoe:  0 
 number of edge features:  1 
 all the attributes of data:  ['edge_index', 'edge_attr', 'y']


### QM9 dataset

In [9]:
from torch_geometric.datasets import QM9
data_name = 'QM9'    # can also be 'computers'
dataset = QM9(root = local_data_root + '/' + data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  133246
Info (attributes) of a single data instance
Data(edge_attr=[8, 4], edge_index=[2, 8], pos=[5, 3], x=[5, 13], y=[1, 12]) 
 number of nodes:  5 
 number of edges:  8 
 number of features per ndoe:  13 
 number of edge features:  4 
 all the attributes of data:  ['x', 'edge_index', 'edge_attr', 'y', 'pos']


### Entities

In [23]:
from torch_geometric.datasets import Entities
data_name = 'AIFB'    # can also be 'computers'
dataset = Entities(root = local_data_root + 'Entities/' + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 58086], edge_norm=[58086], edge_type=[58086], test_idx=[36], test_y=[36], train_idx=[140], train_y=[140]) 
 number of nodes:  8285 
 number of edges:  58086 
 number of features per ndoe:  0 
 number of edge features:  0 
 all the attributes of data:  ['edge_index', 'edge_type', 'edge_norm', 'train_idx', 'train_y', 'test_idx', 'test_y']


#### Large data scale :  Entities  AM

In [24]:
data_name = 'MUTAG'    # can also be 'computers'
dataset = Entities(root = local_data_root + 'Entities/' + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 148454], edge_norm=[148454], edge_type=[148454], test_idx=[68], test_y=[68], train_idx=[272], train_y=[272]) 
 number of nodes:  23644 
 number of edges:  148454 
 number of features per ndoe:  0 
 number of edge features:  0 
 all the attributes of data:  ['edge_index', 'edge_type', 'edge_norm', 'train_idx', 'train_y', 'test_idx', 'test_y']


##### No node features but with labels

In [26]:
data_name = 'BGS'    # can also be 'computers'
dataset = Entities(root = local_data_root + 'Entities/' + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  1
Info (attributes) of a single data instance
Data(edge_index=[2, 1832398], edge_norm=[1832398], edge_type=[1832398], test_idx=[29], test_y=[29], train_idx=[117], train_y=[117]) 
 number of nodes:  333845 
 number of edges:  1832398 
 number of features per ndoe:  0 
 number of edge features:  0 
 all the attributes of data:  ['edge_index', 'edge_type', 'edge_norm', 'train_idx', 'train_y', 'test_idx', 'test_y']


### GEDDataset

In [27]:
from torch_geometric.datasets import GEDDataset
data_name = 'LINUX'    # can also be 'computers'
dataset = GEDDataset(root = local_data_root + 'GEDDataset/' + data_name, name=data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print('Info (attributes) of a single data instance')
print(data, '\n number of nodes: ', data.num_nodes, '\n number of edges: ', data.num_edges, \
  '\n number of features per ndoe: ', data.num_node_features, '\n number of edge features: ', data.num_edge_features, \
  '\n all the attributes of data: ', data.keys)

number of data:  800
Info (attributes) of a single data instance
Data(edge_index=[2, 18], i=[1]) 
 number of nodes:  8 
 number of edges:  18 
 number of features per ndoe:  0 
 number of edge features:  0 
 all the attributes of data:  ['edge_index', 'i']


### MNISTSuperpixels

In [28]:
from torch_geometric.datasets import MNISTSuperpixels
data_name = 'MNISTSuperpixels'    # can also be 'computers'
dataset = MNISTSuperpixels(root = local_data_root + '/' + data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  60000
Info (attributes) of a single data instance
Data(edge_index=[2, 1399], pos=[75, 2], x=[75, 1], y=[1]) 
 number of nodes:  75 
 number of edges:  1399 
 number of features per ndoe:  1 
 number of edge features:  0 
 all the attributes of data:  ['x', 'edge_index', 'y', 'pos']


### ShapeNet

In [29]:
from torch_geometric.datasets import ShapeNet
data_name = 'ShapeNet'    # can also be 'computers'
dataset = ShapeNet(root = local_data_root + '/' + data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

Downloading https://shapenet.cs.stanford.edu/media/shapenetcore_partanno_segmentation_benchmark_v0_normal.zip
Extracting /media/xiangli/storage1/projects/tmpdata/ShapeNet/shapenetcore_partanno_segmentation_benchmark_v0_normal.zip
Processing...
Done!
number of data:  14007
Info (attributes) of a single data instance
Data(category=[1], pos=[2252, 3], x=[2252, 3], y=[2252]) 
 number of nodes:  2252 
 number of edges:  None 
 number of features per ndoe:  3 
 number of edge features:  0 
 all the attributes of data:  ['x', 'y', 'pos', 'category']


### PCPNetDataset

In [30]:
from torch_geometric.datasets import PCPNetDataset
data_name = 'Noisy'    # can also be 'computers'
dataset = PCPNetDataset(root = local_data_root + 'PCPNetDataset/' + data_name, category = 'Noisy')
print('number of data: ', len(dataset))
data = dataset[0]
print('Info (attributes) of a single data instance')
print(data, '\n number of nodes: ', data.num_nodes, '\n number of edges: ', data.num_edges, \
  '\n number of features per ndoe: ', data.num_node_features, '\n number of edge features: ', data.num_edge_features, \
  '\n all the attributes of data: ', data.keys)

number of data:  32
Info (attributes) of a single data instance
Data(pos=[100000, 3], test_idx=[5000], x=[100000, 5]) 
 number of nodes:  100000 
 number of edges:  None 
 number of features per ndoe:  5 
 number of edge features:  0 
 all the attributes of data:  ['x', 'pos', 'test_idx']


### S3DIS

All the data set here contains 4096 nodes, with a total number of 20291 data

This dataset can be used as the mini-batch directly

In [31]:
from torch_geometric.datasets import S3DIS
data_name = 'S3DIS'    # can also be 'computers'
dataset = S3DIS(root = local_data_root + '/' + data_name)
print('number of data: ', len(dataset))
data = dataset[0]
print_data_info(data)

number of data:  20291
Info (attributes) of a single data instance
Data(pos=[4096, 3], x=[4096, 6], y=[4096]) 
 number of nodes:  4096 
 number of edges:  None 
 number of features per ndoe:  6 
 number of edge features:  0 
 all the attributes of data:  ['x', 'y', 'pos']


In [None]:
select = [(idx, data.num_nodes) for idx, data in enumerate(dataset) if data.num_nodes > 5000]
print(select)
    

In [None]:
# free GPU memory
!(nvidia-smi | grep 'python' | awk '{ print $3 }' | xargs -n1 kill -9)