# Dataset Construction for VEN Data as Heterogeneous Graph

In [1]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [2]:
import os
import os.path as osp
from itertools import product
from typing import Callable, List, Optional
import numpy as np
import scipy.sparse as sp
import torch

In [3]:
from torch_geometric.data import (
    HeteroData,
    Data,
    InMemoryDataset,
    download_url,
    extract_zip,
)

In [4]:
os.getcwd()

'/Users/nbai/surfdrive/TUD/Paper/Venice_Graph'

## Lite Dataset of VEN

In [5]:
data = HeteroData()

In [9]:
node_types = ['vis_only', 'vis_tex']
vis = np.load(osp.join(os.getcwd(), 'dataset/Venice/raw/Visual_Features.npy'),allow_pickle=True)[:,2:].astype(float)
tex = np.load(osp.join(os.getcwd(), 'dataset/Venice/raw/Textual_Features.npy'),allow_pickle=True)[:,5:].astype(float)

In [10]:
vis.shape, tex.shape

((2951, 982), (2951, 771))

In [11]:
x = np.hstack([vis,np.nan_to_num(tex)])

In [12]:
x.shape

(2951, 1753)

In [14]:
node_type_idx = np.load('dataset/Venice/raw/node_types.npy')
node_type_idx = torch.from_numpy(node_type_idx).to(torch.long)

In [15]:
node_type_idx.shape

torch.Size([2951])

In [16]:
node_type_idx

tensor([1, 1, 1,  ..., 1, 1, 1])

In [112]:
data['vis_only'].num_nodes = int((node_type_idx == 0).sum())
data['vis_tex'].num_nodes = int((node_type_idx == 1).sum())
data['all'].num_nodes = len(node_type_idx)

In [113]:
data

HeteroData(
  [1mvis_only[0m={ num_nodes=1190 },
  [1mvis_tex[0m={ num_nodes=1761 },
  [1mall[0m={ num_nodes=2951 }
)

In [114]:
x[node_type_idx==0].shape

(1190, 1753)

In [164]:
data['vis_only'].x = torch.from_numpy(vis[node_type_idx==0]).to(torch.float)

In [116]:
data['vis_tex'].x = torch.from_numpy(x[node_type_idx==1]).to(torch.float)

In [117]:
data['all'].x = torch.from_numpy(x).to(torch.float)

In [18]:
y_s = np.load('dataset/Venice/raw/labels.npz',allow_pickle=True)

In [19]:
att_lab = y_s['ATT_LAB'][:,1:10].astype(float)

In [24]:
att_tag = y_s['ATT_LAB'][:,-1]

In [26]:
val_tag = y_s['VAL_LAB'][:,-1]

In [29]:
att_tag.sum(), val_tag.sum(), (att_tag * val_tag).sum()

(1356, 756, 361)

In [20]:
val_lab = np.nan_to_num(y_s['VAL_LAB'][:,2:13].astype(float))

In [21]:
ys = np.hstack([att_lab, val_lab])
ys.shape

(2951, 20)

In [122]:
data['vis_only'].y = torch.from_numpy(att_lab[node_type_idx==0]).to(torch.float)

In [123]:
data['vis_tex'].y = torch.from_numpy(ys[node_type_idx==1]).to(torch.float)

In [124]:
data['all'].y = torch.from_numpy(ys).to(torch.float)

In [125]:
data.node_type = node_type_idx

In [126]:
data

HeteroData(
  node_type=[2951],
  [1mvis_only[0m={
    num_nodes=1190,
    x=[1190, 1753],
    y=[1190, 9]
  },
  [1mvis_tex[0m={
    num_nodes=1761,
    x=[1761, 1753],
    y=[1761, 20]
  },
  [1mall[0m={
    num_nodes=2951,
    x=[2951, 1753],
    y=[2951, 20]
  }
)

In [127]:
split = np.load('dataset/Venice/train_val_test_idx.npz')
for name in ['train', 'val', 'test']:
    idx = split[f'{name}_idx']
    idx = torch.from_numpy(idx).to(torch.long)
    mask = torch.zeros(data['all'].num_nodes, dtype=torch.bool)
    mask[idx] = True
    data['all'][f'{name}_mask'] = mask
    data['vis_only'][f'{name}_mask'] = mask[node_type_idx==0]
    data['vis_tex'][f'{name}_mask'] = mask[node_type_idx==1]

In [128]:
data

HeteroData(
  node_type=[2951],
  [1mvis_only[0m={
    num_nodes=1190,
    x=[1190, 1753],
    y=[1190, 9],
    train_mask=[1190],
    val_mask=[1190],
    test_mask=[1190]
  },
  [1mvis_tex[0m={
    num_nodes=1761,
    x=[1761, 1753],
    y=[1761, 20],
    train_mask=[1761],
    val_mask=[1761],
    test_mask=[1761]
  },
  [1mall[0m={
    num_nodes=2951,
    x=[2951, 1753],
    y=[2951, 20],
    train_mask=[2951],
    val_mask=[2951],
    test_mask=[2951]
  }
)

In [139]:
link_types = ['SOC', 'SPA', 'TEM', 'simp']

In [145]:
A_SOC.data.shape

(488103,)

In [160]:
for link in link_types:
    A_sub = sp.load_npz(f'dataset/Venice/A_{link}.npz').tocoo()
    if A_sub.nnz>0:
        row = torch.from_numpy(A_sub.row).to(torch.long)
        col = torch.from_numpy(A_sub.col).to(torch.long)
        data['all', f'{link}_link', 'all'].edge_index = torch.stack([row, col], dim=0)
        data['all', f'{link}_link', 'all'].edge_attr = torch.from_numpy(A_sub.data).to(torch.long)

In [147]:
data

HeteroData(
  node_type=[2951],
  [1mvis_only[0m={
    num_nodes=1190,
    x=[1190, 1753],
    y=[1190, 9],
    train_mask=[1190],
    val_mask=[1190],
    test_mask=[1190]
  },
  [1mvis_tex[0m={
    num_nodes=1761,
    x=[1761, 1753],
    y=[1761, 20],
    train_mask=[1761],
    val_mask=[1761],
    test_mask=[1761]
  },
  [1mall[0m={
    num_nodes=2951,
    x=[2951, 1753],
    y=[2951, 20],
    train_mask=[2951],
    val_mask=[2951],
    test_mask=[2951]
  },
  [1m(all, SOC_link, all)[0m={
    edge_index=[2, 488103],
    edge_attr=[488103]
  },
  [1m(all, SPA_link, all)[0m={
    edge_index=[2, 445779],
    edge_attr=[445779]
  },
  [1m(all, TEM_link, all)[0m={
    edge_index=[2, 501191],
    edge_attr=[501191]
  },
  [1m(all, simp_link, all)[0m={
    edge_index=[2, 1071977],
    edge_attr=[1071977]
  }
)

In [151]:
s = {}
s['vis_only'] = np.arange(len(x))[node_type_idx==0]
s['vis_tex'] = np.arange(len(x))[node_type_idx==1]

In [162]:
for src, dst in product(node_types, node_types):
    for link in link_types:
        A_sub = sp.load_npz(f'dataset/Venice/A_{link}.npz')[s[src]][:,s[dst]].tocoo()
        if A_sub.nnz>0:
            row = torch.from_numpy(A_sub.row).to(torch.long)
            col = torch.from_numpy(A_sub.col).to(torch.long)
            data[src, f'{link}_link', dst].edge_index = torch.stack([row, col], dim=0)
            data[src, f'{link}_link', dst].edge_attr = torch.from_numpy(A_sub.data).to(torch.long)

In [165]:
data

HeteroData(
  node_type=[2951],
  [1mvis_only[0m={
    num_nodes=1190,
    x=[1190, 982],
    y=[1190, 9],
    train_mask=[1190],
    val_mask=[1190],
    test_mask=[1190]
  },
  [1mvis_tex[0m={
    num_nodes=1761,
    x=[1761, 1753],
    y=[1761, 20],
    train_mask=[1761],
    val_mask=[1761],
    test_mask=[1761]
  },
  [1mall[0m={
    num_nodes=2951,
    x=[2951, 1753],
    y=[2951, 20],
    train_mask=[2951],
    val_mask=[2951],
    test_mask=[2951]
  },
  [1m(all, SOC_link, all)[0m={
    edge_index=[2, 488103],
    edge_attr=[488103]
  },
  [1m(all, SPA_link, all)[0m={
    edge_index=[2, 445779],
    edge_attr=[445779]
  },
  [1m(all, TEM_link, all)[0m={
    edge_index=[2, 501191],
    edge_attr=[501191]
  },
  [1m(all, simp_link, all)[0m={
    edge_index=[2, 1071977],
    edge_attr=[1071977]
  },
  [1m(vis_only, SOC_link, vis_only)[0m={
    edge_index=[2, 169762],
    edge_attr=[169762]
  },
  [1m(vis_only, SPA_link, vis_only)[0m={
    edge_index=[2, 77626],
 

## Large Dataset of VEN

In [15]:
data = HeteroData()

In [16]:
node_types = ['vis_only', 'vis_tex']
vis = np.load(osp.join(os.getcwd(), 'dataset/Venice-XL/Visual_Features.npy'),allow_pickle=True)[:,2:].astype(float)
tex = np.load(osp.join(os.getcwd(), 'dataset/Venice-XL/Textual_Features.npy'),allow_pickle=True)[:,5:].astype(float)

In [17]:
vis.shape, tex.shape

((80963, 982), (80963, 771))

In [18]:
x = np.hstack([vis,np.nan_to_num(tex)])

In [19]:
x.shape

(80963, 1753)

In [20]:
node_type_idx = np.load('dataset/Venice-XL/node_types.npy')
node_type_idx = torch.from_numpy(node_type_idx).to(torch.long)

In [21]:
node_type_idx.shape

torch.Size([80963])

In [22]:
data['vis_only'].num_nodes = int((node_type_idx == 0).sum())
data['vis_tex'].num_nodes = int((node_type_idx == 1).sum())
data['all'].num_nodes = len(node_type_idx)

In [23]:
data

HeteroData(
  [1mvis_only[0m={ num_nodes=31140 },
  [1mvis_tex[0m={ num_nodes=49823 },
  [1mall[0m={ num_nodes=80963 }
)

In [24]:
x[node_type_idx==0].shape

(31140, 1753)

In [25]:
data['vis_only'].x = torch.from_numpy(vis[node_type_idx==0]).to(torch.float)

In [26]:
data['vis_tex'].x = torch.from_numpy(x[node_type_idx==1]).to(torch.float)

In [27]:
data['all'].x = torch.from_numpy(x).to(torch.float)

In [28]:
y_s = np.load('dataset/Venice-XL/labels.npz',allow_pickle=True)

In [29]:
att_lab = y_s['ATT_LAB'][:,1:10].astype(float)

In [30]:
val_lab = np.nan_to_num(y_s['VAL_LAB'][:,2:13].astype(float))

In [31]:
ys = np.hstack([att_lab, val_lab])
ys.shape

(80963, 20)

In [32]:
data['vis_only'].y = torch.from_numpy(att_lab[node_type_idx==0]).to(torch.float)

In [33]:
data['vis_tex'].y = torch.from_numpy(ys[node_type_idx==1]).to(torch.float)

In [34]:
data['all'].y = torch.from_numpy(ys).to(torch.float)

In [35]:
data.node_type = node_type_idx

In [36]:
data

HeteroData(
  node_type=[80963],
  [1mvis_only[0m={
    num_nodes=31140,
    x=[31140, 982],
    y=[31140, 9]
  },
  [1mvis_tex[0m={
    num_nodes=49823,
    x=[49823, 1753],
    y=[49823, 20]
  },
  [1mall[0m={
    num_nodes=80963,
    x=[80963, 1753],
    y=[80963, 20]
  }
)

In [37]:
split = np.load('dataset/Venice-XL/train_val_test_idx.npz')
for name in ['train', 'val', 'test']:
    idx = split[f'{name}_idx']
    idx = torch.from_numpy(idx).to(torch.long)
    mask = torch.zeros(data['all'].num_nodes, dtype=torch.bool)
    mask[idx] = True
    data['all'][f'{name}_mask'] = mask
    data['vis_only'][f'{name}_mask'] = mask[node_type_idx==0]
    data['vis_tex'][f'{name}_mask'] = mask[node_type_idx==1]

In [38]:
data

HeteroData(
  node_type=[80963],
  [1mvis_only[0m={
    num_nodes=31140,
    x=[31140, 982],
    y=[31140, 9],
    train_mask=[31140],
    val_mask=[31140],
    test_mask=[31140]
  },
  [1mvis_tex[0m={
    num_nodes=49823,
    x=[49823, 1753],
    y=[49823, 20],
    train_mask=[49823],
    val_mask=[49823],
    test_mask=[49823]
  },
  [1mall[0m={
    num_nodes=80963,
    x=[80963, 1753],
    y=[80963, 20],
    train_mask=[80963],
    val_mask=[80963],
    test_mask=[80963]
  }
)

In [39]:
link_types = ['SOC', 'SPA', 'TEM', 'simp']

In [41]:
for link in link_types:
    A_sub = sp.load_npz(f'dataset/Venice-XL/A_{link}.npz').tocoo()
    if A_sub.nnz>0:
        row = torch.from_numpy(A_sub.row).to(torch.long)
        col = torch.from_numpy(A_sub.col).to(torch.long)
        data['all', f'{link}_link', 'all'].edge_index = torch.stack([row, col], dim=0)
        data['all', f'{link}_link', 'all'].edge_attr = torch.from_numpy(A_sub.data).to(torch.long)

In [42]:
data

HeteroData(
  node_type=[80963],
  [1mvis_only[0m={
    num_nodes=31140,
    x=[31140, 982],
    y=[31140, 9],
    train_mask=[31140],
    val_mask=[31140],
    test_mask=[31140]
  },
  [1mvis_tex[0m={
    num_nodes=49823,
    x=[49823, 1753],
    y=[49823, 20],
    train_mask=[49823],
    val_mask=[49823],
    test_mask=[49823]
  },
  [1mall[0m={
    num_nodes=80963,
    x=[80963, 1753],
    y=[80963, 20],
    train_mask=[80963],
    val_mask=[80963],
    test_mask=[80963]
  },
  [1m(all, SOC_link, all)[0m={
    edge_index=[2, 76422265],
    edge_attr=[76422265]
  },
  [1m(all, SPA_link, all)[0m={
    edge_index=[2, 202173159],
    edge_attr=[202173159]
  },
  [1m(all, TEM_link, all)[0m={
    edge_index=[2, 71135671],
    edge_attr=[71135671]
  },
  [1m(all, simp_link, all)[0m={
    edge_index=[2, 290091503],
    edge_attr=[290091503]
  }
)

In [43]:
s = {}
s['vis_only'] = np.arange(len(x))[node_type_idx==0]
s['vis_tex'] = np.arange(len(x))[node_type_idx==1]

In [45]:
for src, dst in product(node_types, node_types):
    for link in link_types:
        A_sub = sp.load_npz(f'dataset/Venice-XL/A_{link}.npz')[s[src]][:,s[dst]].tocoo()
        if A_sub.nnz>0:
            row = torch.from_numpy(A_sub.row).to(torch.long)
            col = torch.from_numpy(A_sub.col).to(torch.long)
            data[src, f'{link}_link', dst].edge_index = torch.stack([row, col], dim=0)
            data[src, f'{link}_link', dst].edge_attr = torch.from_numpy(A_sub.data).to(torch.long)

In [46]:
data

HeteroData(
  node_type=[80963],
  [1mvis_only[0m={
    num_nodes=31140,
    x=[31140, 982],
    y=[31140, 9],
    train_mask=[31140],
    val_mask=[31140],
    test_mask=[31140]
  },
  [1mvis_tex[0m={
    num_nodes=49823,
    x=[49823, 1753],
    y=[49823, 20],
    train_mask=[49823],
    val_mask=[49823],
    test_mask=[49823]
  },
  [1mall[0m={
    num_nodes=80963,
    x=[80963, 1753],
    y=[80963, 20],
    train_mask=[80963],
    val_mask=[80963],
    test_mask=[80963]
  },
  [1m(all, SOC_link, all)[0m={
    edge_index=[2, 76422265],
    edge_attr=[76422265]
  },
  [1m(all, SPA_link, all)[0m={
    edge_index=[2, 202173159],
    edge_attr=[202173159]
  },
  [1m(all, TEM_link, all)[0m={
    edge_index=[2, 71135671],
    edge_attr=[71135671]
  },
  [1m(all, simp_link, all)[0m={
    edge_index=[2, 290091503],
    edge_attr=[290091503]
  },
  [1m(vis_only, SOC_link, vis_only)[0m={
    edge_index=[2, 9813406],
    edge_attr=[9813406]
  },
  [1m(vis_only, SPA_link, vis_o

## Formal Class Definition

In [2]:
class VEN(InMemoryDataset):
    r"""A subset of Flickr post collected in Venice annotated with Heritage 
    Values and Attributes, as collected in the `"Heri-Graphs: A Workflow of 
    Creating Datasets for Multi-modal Machine Learning on Graphs of Heritage 
    Values and Attributes with Social Media" <https://arxiv.org/abs/2205.07545>`
    paper.
    VEN is a heterogeneous graph containing two types of nodes - nodes with only 
    visual features 'vis_only' (1,190 nodes), nodes with both visual and textual
    features 'vis_tex' (1,761 nodes) and four types of links - social similarity
    'SOC' (488,103 links), spatial similarity (445,779 links), temporal similarity
    (501,191 links), and simple composed link (1,071,977 links).
    Vis_only nodes are represented with 982-dimensional visual features and are
    divided into 9 heritage attribute categories 
    ('architectural elements', 'form', 'gastronomy', 'interior',
    'landscape scenery and natural features', 'monuments', 'people', 'product', 
    'urban scenery').
    Vis_text nodes are represented with 1753-dimensional visual and textual 
    features and are divided into 9 heritage attribute categories plus 11 
    heritage value categories ('criterion i-x', 'other').
    Both types of nodes are also merged into a single type of node 'all' with 
    1753-dimensional features and 20-dimensional label categories.

    Args:
        root (string): Root directory where the dataset should be saved.
        transform (callable, optional): A function/transform that takes in an
            :obj:`torch_geometric.data.HeteroData` object and returns a
            transformed version. The data object will be transformed before
            every access. (default: :obj:`None`)
        pre_transform (callable, optional): A function/transform that takes in
            an :obj:`torch_geometric.data.HeteroData` object and returns a
            transformed version. The data object will be transformed before
            being saved to disk. (default: :obj:`None`)
    
    Stats:
            * - #nodes
              - #edges
              - #features
              - #classes
            * - 2,951
              - 1,071,977
              - 1753
              - 20
    """

    url = 'https://drive.google.com/uc?export=download&id=1sxcKiZr1YGDv06wr03nsk5HVZledgzi9'

    def __init__(self, root: str, transform: Optional[Callable] = None,
                 pre_transform: Optional[Callable] = None):
        super().__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
        
    @property
    def raw_file_names(self) -> List[str]:
        return [
            'A_simp.npz', 'A_SOC.npz', 'A_SPA.npz', 'A_TEM.npz', 'labels.npz',
            'node_types.npy', 'Textual_Features.npy', 'train_val_test_idx.npz',
            'Visual_Features.npy'
        ]

    @property
    def processed_file_names(self) -> str:
        return 'data.pt'

    def download(self):
        path = download_url(self.url, self.raw_dir)
        extract_zip(path, self.raw_dir)
        os.remove(path)

    def process(self):
        data = HeteroData()

        node_types = ['vis_only', 'vis_tex']
        link_types = ['SOC', 'SPA', 'TEM', 'simp']

        vis = np.load(osp.join(self.raw_dir, 'Visual_Features.npy'),allow_pickle=True)[:,2:].astype(float)
        tex = np.load(osp.join(self.raw_dir, 'Textual_Features.npy'),allow_pickle=True)[:,5:].astype(float)

        x = np.hstack([vis,np.nan_to_num(tex)])


        node_type_idx = np.load(osp.join(self.raw_dir, 'node_types.npy'))
        node_type_idx = torch.from_numpy(node_type_idx).to(torch.long)

        data['vis_only'].num_nodes = int((node_type_idx == 0).sum())
        data['vis_tex'].num_nodes = int((node_type_idx == 1).sum())
        data['all'].num_nodes = len(node_type_idx)

        data['vis_only'].x = torch.from_numpy(vis[node_type_idx==0]).to(torch.float)
        data['vis_tex'].x = torch.from_numpy(x[node_type_idx==1]).to(torch.float)
        data['all'].x = torch.from_numpy(x).to(torch.float)


        y_s = np.load(osp.join(self.raw_dir, 'labels.npz'), allow_pickle=True)
        att_lab = y_s['ATT_LAB'][:,1:10].astype(float)
        val_lab = np.nan_to_num(y_s['VAL_LAB'][:,2:13].astype(float))
        ys = np.hstack([att_lab, val_lab])

        data['vis_only'].y = torch.from_numpy(att_lab[node_type_idx==0]).to(torch.float)
        data['vis_tex'].y = torch.from_numpy(ys[node_type_idx==1]).to(torch.float)
        data['all'].y = torch.from_numpy(ys).to(torch.float)

        data.node_type = node_type_idx

        split = np.load(osp.join(self.raw_dir, 'train_val_test_idx.npz'))
        for name in ['train', 'val', 'test']:
            idx = split[f'{name}_idx']
            idx = torch.from_numpy(idx).to(torch.long)
            mask = torch.zeros(data['all'].num_nodes, dtype=torch.bool)
            mask[idx] = True
            data['all'][f'{name}_mask'] = mask
            data['vis_only'][f'{name}_mask'] = mask[node_type_idx==0]
            data['vis_tex'][f'{name}_mask'] = mask[node_type_idx==1]

        
        s = {}
        s['vis_only'] = np.arange(len(x))[node_type_idx==0]
        s['vis_tex'] = np.arange(len(x))[node_type_idx==1]

        for link in link_types:
            A_sub = sp.load_npz(osp.join(self.raw_dir, f'A_{link}.npz')).tocoo()
            if A_sub.nnz>0:
                row = torch.from_numpy(A_sub.row).to(torch.long)
                col = torch.from_numpy(A_sub.col).to(torch.long)
                data['all', f'{link}_link', 'all'].edge_index = torch.stack([row, col], dim=0)
                data['all', f'{link}_link', 'all'].edge_attr = torch.from_numpy(A_sub.data).to(torch.long)

        for src, dst in product(node_types, node_types):
            for link in link_types:
                A_sub = sp.load_npz(osp.join(self.raw_dir, f'A_{link}.npz'))[s[src]][:,s[dst]].tocoo()
                if A_sub.nnz>0:
                    row = torch.from_numpy(A_sub.row).to(torch.long)
                    col = torch.from_numpy(A_sub.col).to(torch.long)
                    data[src, f'{link}_link', dst].edge_index = torch.stack([row, col], dim=0)
                    data[src, f'{link}_link', dst].edge_attr = torch.from_numpy(A_sub.data).to(torch.long)


        if self.pre_transform is not None:
            data = self.pre_transform(data)

        torch.save(self.collate([data]), self.processed_paths[0])

    def __repr__(self) -> str:
        return f'{self.__class__.__name__}()'

In [4]:
class VEN_XL(InMemoryDataset):
    r"""A large subset of Flickr post collected in Venice annotated with Heritage 
    Values and Attributes, as collected in the `"Heri-Graphs: A Workflow of 
    Creating Datasets for Multi-modal Machine Learning on Graphs of Heritage 
    Values and Attributes with Social Media" <https://arxiv.org/abs/2205.07545>`
    paper.
    VEN_XL is a heterogeneous graph containing two types of nodes - nodes with only 
    visual features 'vis_only' (31,140 nodes), nodes with both visual and textual
    features 'vis_tex' (49,823 nodes) and four types of links - social similarity
    'SOC' (76,422,265 links), spatial similarity (202,173,159 links), temporal similarity
    (71,135,671 links), and simple composed link (290,091,503 links).
    Vis_only nodes are represented with 982-dimensional visual features and are
    divided into 9 heritage attribute categories 
    ('architectural elements', 'form', 'gastronomy', 'interior',
    'landscape scenery and natural features', 'monuments', 'people', 'product', 
    'urban scenery').
    Vis_text nodes are represented with 1753-dimensional visual and textual 
    features and are divided into 9 heritage attribute categories plus 11 
    heritage value categories ('criterion i-x', 'other').
    Both types of nodes are also merged into a single type of node 'all' with 
    1753-dimensional features and 20-dimensional label categories.

    Args:
        root (string): Root directory where the dataset should be saved.
        transform (callable, optional): A function/transform that takes in an
            :obj:`torch_geometric.data.HeteroData` object and returns a
            transformed version. The data object will be transformed before
            every access. (default: :obj:`None`)
        pre_transform (callable, optional): A function/transform that takes in
            an :obj:`torch_geometric.data.HeteroData` object and returns a
            transformed version. The data object will be transformed before
            being saved to disk. (default: :obj:`None`)
    
    Stats:
            * - #nodes
              - #edges
              - #features
              - #classes
            * - 80,963
              - 290,091,503
              - 1753
              - 20
    """

    url = 'https://drive.google.com/uc?export=download&id=1QZ5tyUWs6jYjh7mJrsnpou76iy-vb0CA'

    def __init__(self, root: str, transform: Optional[Callable] = None,
                 pre_transform: Optional[Callable] = None):
        super().__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
        
    @property
    def raw_file_names(self) -> List[str]:
        return [
            'A_simp.npz', 'A_SOC.npz', 'A_SPA.npz', 'A_TEM.npz', 'labels.npz',
            'node_types.npy', 'Textual_Features.npy', 'train_val_test_idx.npz',
            'Visual_Features.npy'
        ]

    @property
    def processed_file_names(self) -> str:
        return 'data.pt'

    def download(self):
        path = download_url(self.url, self.raw_dir)
        extract_zip(path, self.raw_dir)
        os.remove(path)

    def process(self):
        data = HeteroData()

        node_types = ['vis_only', 'vis_tex']
        link_types = ['SOC', 'SPA', 'TEM', 'simp']

        vis = np.load(osp.join(self.raw_dir, 'Visual_Features.npy'),allow_pickle=True)[:,2:].astype(float)
        tex = np.load(osp.join(self.raw_dir, 'Textual_Features.npy'),allow_pickle=True)[:,5:].astype(float)

        x = np.hstack([vis,np.nan_to_num(tex)])


        node_type_idx = np.load(osp.join(self.raw_dir, 'node_types.npy'))
        node_type_idx = torch.from_numpy(node_type_idx).to(torch.long)

        data['vis_only'].num_nodes = int((node_type_idx == 0).sum())
        data['vis_tex'].num_nodes = int((node_type_idx == 1).sum())
        data['all'].num_nodes = len(node_type_idx)

        data['vis_only'].x = torch.from_numpy(vis[node_type_idx==0]).to(torch.float)
        data['vis_tex'].x = torch.from_numpy(x[node_type_idx==1]).to(torch.float)
        data['all'].x = torch.from_numpy(x).to(torch.float)


        y_s = np.load(osp.join(self.raw_dir, 'labels.npz'), allow_pickle=True)
        att_lab = y_s['ATT_LAB'][:,1:10].astype(float)
        val_lab = np.nan_to_num(y_s['VAL_LAB'][:,2:13].astype(float))
        ys = np.hstack([att_lab, val_lab])

        data['vis_only'].y = torch.from_numpy(att_lab[node_type_idx==0]).to(torch.float)
        data['vis_tex'].y = torch.from_numpy(ys[node_type_idx==1]).to(torch.float)
        data['all'].y = torch.from_numpy(ys).to(torch.float)

        data.node_type = node_type_idx

        split = np.load(osp.join(self.raw_dir, 'train_val_test_idx.npz'))
        for name in ['train', 'val', 'test']:
            idx = split[f'{name}_idx']
            idx = torch.from_numpy(idx).to(torch.long)
            mask = torch.zeros(data['all'].num_nodes, dtype=torch.bool)
            mask[idx] = True
            data['all'][f'{name}_mask'] = mask
            data['vis_only'][f'{name}_mask'] = mask[node_type_idx==0]
            data['vis_tex'][f'{name}_mask'] = mask[node_type_idx==1]

        
        s = {}
        s['vis_only'] = np.arange(len(x))[node_type_idx==0]
        s['vis_tex'] = np.arange(len(x))[node_type_idx==1]

        for link in link_types:
            A_sub = sp.load_npz(osp.join(self.raw_dir, f'A_{link}.npz')).tocoo()
            if A_sub.nnz>0:
                row = torch.from_numpy(A_sub.row).to(torch.long)
                col = torch.from_numpy(A_sub.col).to(torch.long)
                data['all', f'{link}_link', 'all'].edge_index = torch.stack([row, col], dim=0)
                data['all', f'{link}_link', 'all'].edge_attr = torch.from_numpy(A_sub.data).to(torch.long)

        for src, dst in product(node_types, node_types):
            for link in link_types:
                A_sub = sp.load_npz(osp.join(self.raw_dir, f'A_{link}.npz'))[s[src]][:,s[dst]].tocoo()
                if A_sub.nnz>0:
                    row = torch.from_numpy(A_sub.row).to(torch.long)
                    col = torch.from_numpy(A_sub.col).to(torch.long)
                    data[src, f'{link}_link', dst].edge_index = torch.stack([row, col], dim=0)
                    data[src, f'{link}_link', dst].edge_attr = torch.from_numpy(A_sub.data).to(torch.long)


        if self.pre_transform is not None:
            data = self.pre_transform(data)

        torch.save(self.collate([data]), self.processed_paths[0])

    def __repr__(self) -> str:
        return f'{self.__class__.__name__}()'

In [2]:
class VEN_XL_Homo(InMemoryDataset):
    r"""A large subset of Flickr post collected in Venice annotated with Heritage 
    Values and Attributes, as collected in the `"Heri-Graphs: A Workflow of 
    Creating Datasets for Multi-modal Machine Learning on Graphs of Heritage 
    Values and Attributes with Social Media" <https://arxiv.org/abs/2205.07545>`
    paper.
    VEN_XL is a heterogeneous graph containing two types of nodes - nodes with only 
    visual features 'vis_only' (31,140 nodes), nodes with both visual and textual
    features 'vis_tex' (49,823 nodes) and four types of links - social similarity
    'SOC' (76,422,265 links), spatial similarity (202,173,159 links), temporal similarity
    (71,135,671 links), and simple composed link (290,091,503 links).
    Vis_only nodes are represented with 982-dimensional visual features and are
    divided into 9 heritage attribute categories 
    ('architectural elements', 'form', 'gastronomy', 'interior',
    'landscape scenery and natural features', 'monuments', 'people', 'product', 
    'urban scenery').
    Vis_text nodes are represented with 1753-dimensional visual and textual 
    features and are divided into 9 heritage attribute categories plus 11 
    heritage value categories ('criterion i-x', 'other').
    Both types of nodes are also merged into a single type of node 'all' with 
    1753-dimensional features and 20-dimensional label categories.

    Args:
        root (string): Root directory where the dataset should be saved.
        transform (callable, optional): A function/transform that takes in an
            :obj:`torch_geometric.data.HeteroData` object and returns a
            transformed version. The data object will be transformed before
            every access. (default: :obj:`None`)
        pre_transform (callable, optional): A function/transform that takes in
            an :obj:`torch_geometric.data.HeteroData` object and returns a
            transformed version. The data object will be transformed before
            being saved to disk. (default: :obj:`None`)
    
    Stats:
            * - #nodes
              - #edges
              - #features
              - #classes
            * - 80,963
              - 290,091,503
              - 1753
              - 20
    """

    url = 'https://drive.google.com/uc?export=download&id=1sxcKiZr1YGDv06wr03nsk5HVZledgzi9'

    def __init__(self, root: str, transform: Optional[Callable] = None,
                 pre_transform: Optional[Callable] = None):
        super().__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
        
    @property
    def raw_file_names(self) -> List[str]:
        return [
            'A_simp.npz', 'A_SOC.npz', 'A_SPA.npz', 'A_TEM.npz', 'labels.npz',
            'node_types.npy', 'Textual_Features.npy', 'train_val_test_idx.npz',
            'Visual_Features.npy'
        ]

    @property
    def processed_file_names(self) -> str:
        return 'data.pt'

    def download(self):
        path = download_url(self.url, self.raw_dir)
        extract_zip(path, self.raw_dir)
        os.remove(path)

    def process(self):
        data = Data()

        link_types = ['simp']

        vis = np.load(osp.join(self.raw_dir, 'Visual_Features.npy'),allow_pickle=True)[:,2:].astype(float)
        tex = np.load(osp.join(self.raw_dir, 'Textual_Features.npy'),allow_pickle=True)[:,5:].astype(float)

        x = np.hstack([vis,np.nan_to_num(tex)])

        node_type_idx = np.load(osp.join(self.raw_dir, 'node_types.npy'))
        node_type_idx = torch.from_numpy(node_type_idx).to(torch.long)

        data.num_nodes = len(node_type_idx)

        data.x = torch.from_numpy(x).to(torch.float)


        y_s = np.load(osp.join(self.raw_dir, 'labels.npz'), allow_pickle=True)
        att_lab = y_s['ATT_LAB'][:,1:10].astype(float)
        val_lab = np.nan_to_num(y_s['VAL_LAB'][:,2:13].astype(float))
        ys = np.hstack([att_lab, val_lab])

        data.y = torch.from_numpy(ys).to(torch.float)

        data.node_type = node_type_idx

        split = np.load(osp.join(self.raw_dir, 'train_val_test_idx.npz'))
        for name in ['train', 'val', 'test']:
            idx = split[f'{name}_idx']
            idx = torch.from_numpy(idx).to(torch.long)
            mask = torch.zeros(data.num_nodes, dtype=torch.bool)
            mask[idx] = True
            data[f'{name}_mask'] = mask
                    
        s = {}
        
        for link in link_types:
            A_sub = sp.load_npz(osp.join(self.raw_dir, f'A_{link}.npz')).tocoo()
            if A_sub.nnz>0:
                row = torch.from_numpy(A_sub.row).to(torch.long)
                col = torch.from_numpy(A_sub.col).to(torch.long)
                data.edge_index = torch.stack([row, col], dim=0)
                data.edge_attr = torch.from_numpy(A_sub.data).to(torch.long)

        if self.pre_transform is not None:
            data = self.pre_transform(data)

        torch.save(self.collate([data]), self.processed_paths[0])

    def __repr__(self) -> str:
        return f'{self.__class__.__name__}()'

In [3]:
class VEN_XL_Links(InMemoryDataset):
    r"""A large subset of Flickr post collected in Venice annotated with Heritage 
    Values and Attributes, as collected in the `"Heri-Graphs: A Workflow of 
    Creating Datasets for Multi-modal Machine Learning on Graphs of Heritage 
    Values and Attributes with Social Media" <https://arxiv.org/abs/2205.07545>`
    paper.
    VEN_XL is a heterogeneous graph containing two types of nodes - nodes with only 
    visual features 'vis_only' (31,140 nodes), nodes with both visual and textual
    features 'vis_tex' (49,823 nodes) and four types of links - social similarity
    'SOC' (76,422,265 links), spatial similarity (202,173,159 links), temporal similarity
    (71,135,671 links), and simple composed link (290,091,503 links).
    Vis_only nodes are represented with 982-dimensional visual features and are
    divided into 9 heritage attribute categories 
    ('architectural elements', 'form', 'gastronomy', 'interior',
    'landscape scenery and natural features', 'monuments', 'people', 'product', 
    'urban scenery').
    Vis_text nodes are represented with 1753-dimensional visual and textual 
    features and are divided into 9 heritage attribute categories plus 11 
    heritage value categories ('criterion i-x', 'other').
    Both types of nodes are also merged into a single type of node 'all' with 
    1753-dimensional features and 20-dimensional label categories.

    Args:
        root (string): Root directory where the dataset should be saved.
        transform (callable, optional): A function/transform that takes in an
            :obj:`torch_geometric.data.HeteroData` object and returns a
            transformed version. The data object will be transformed before
            every access. (default: :obj:`None`)
        pre_transform (callable, optional): A function/transform that takes in
            an :obj:`torch_geometric.data.HeteroData` object and returns a
            transformed version. The data object will be transformed before
            being saved to disk. (default: :obj:`None`)
    
    Stats:
            * - #nodes
              - #edges
              - #features
              - #classes
            * - 80,963
              - 290,091,503
              - 1753
              - 20
    """

    url = 'https://drive.google.com/uc?export=download&id=1QZ5tyUWs6jYjh7mJrsnpou76iy-vb0CA'

    def __init__(self, root: str, transform: Optional[Callable] = None,
                 pre_transform: Optional[Callable] = None):
        super().__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
        
    @property
    def raw_file_names(self) -> List[str]:
        return [
            'A_simp.npz', 'A_SOC.npz', 'A_SPA.npz', 'A_TEM.npz', 'labels.npz',
            'node_types.npy', 'Textual_Features.npy', 'train_val_test_idx.npz',
            'Visual_Features.npy'
        ]

    @property
    def processed_file_names(self) -> str:
        return 'data.pt'

    def download(self):
        path = download_url(self.url, self.raw_dir)
        extract_zip(path, self.raw_dir)
        os.remove(path)

    def process(self):
        data = HeteroData()

        node_types = ['all']
        link_types = ['SOC', 'SPA', 'TEM', 'simp']

        vis = np.load(osp.join(self.raw_dir, 'Visual_Features.npy'),allow_pickle=True)[:,2:].astype(float)
        tex = np.load(osp.join(self.raw_dir, 'Textual_Features.npy'),allow_pickle=True)[:,5:].astype(float)

        x = np.hstack([vis,np.nan_to_num(tex)])


        node_type_idx = np.load(osp.join(self.raw_dir, 'node_types.npy'))
        node_type_idx = torch.from_numpy(node_type_idx).to(torch.long)

        #data['vis_only'].num_nodes = int((node_type_idx == 0).sum())
        #data['vis_tex'].num_nodes = int((node_type_idx == 1).sum())
        data['all'].num_nodes = len(node_type_idx)

        #data['vis_only'].x = torch.from_numpy(vis[node_type_idx==0]).to(torch.float)
        #data['vis_tex'].x = torch.from_numpy(x[node_type_idx==1]).to(torch.float)
        data['all'].x = torch.from_numpy(x).to(torch.float)


        y_s = np.load(osp.join(self.raw_dir, 'labels.npz'), allow_pickle=True)
        att_lab = y_s['ATT_LAB'][:,1:10].astype(float)
        val_lab = np.nan_to_num(y_s['VAL_LAB'][:,2:13].astype(float))
        ys = np.hstack([att_lab, val_lab])

        #data['vis_only'].y = torch.from_numpy(att_lab[node_type_idx==0]).to(torch.float)
        #data['vis_tex'].y = torch.from_numpy(ys[node_type_idx==1]).to(torch.float)
        data['all'].y = torch.from_numpy(ys).to(torch.float)

        data.node_type = node_type_idx

        split = np.load(osp.join(self.raw_dir, 'train_val_test_idx.npz'))
        for name in ['train', 'val', 'test']:
            idx = split[f'{name}_idx']
            idx = torch.from_numpy(idx).to(torch.long)
            mask = torch.zeros(data['all'].num_nodes, dtype=torch.bool)
            mask[idx] = True
            data['all'][f'{name}_mask'] = mask
            #data['vis_only'][f'{name}_mask'] = mask[node_type_idx==0]
            #data['vis_tex'][f'{name}_mask'] = mask[node_type_idx==1]

        
        s = {}
        #s['vis_only'] = np.arange(len(x))[node_type_idx==0]
        #s['vis_tex'] = np.arange(len(x))[node_type_idx==1]

        for link in link_types:
            A_sub = sp.load_npz(osp.join(self.raw_dir, f'A_{link}.npz')).tocoo()
            if A_sub.nnz>0:
                row = torch.from_numpy(A_sub.row).to(torch.long)
                col = torch.from_numpy(A_sub.col).to(torch.long)
                data['all', f'{link}_link', 'all'].edge_index = torch.stack([row, col], dim=0)
                data['all', f'{link}_link', 'all'].edge_attr = torch.from_numpy(A_sub.data).to(torch.long)

        if self.pre_transform is not None:
            data = self.pre_transform(data)

        torch.save(self.collate([data]), self.processed_paths[0])

    def __repr__(self) -> str:
        return f'{self.__class__.__name__}()'

In [7]:
dataset = VEN_XL_Homo('dataset/Venice_XL_homo')
data = dataset[0]

Processing...
Done!


In [4]:
dataset = VEN_XL_Links('dataset/Venice_XL_links')
data = dataset[0]

Processing...
Done!


In [3]:
#path = osp.join(os.getcwd(), '../../data/VEN')
path = 'dataset/Venice'
dataset = VEN(path)
data = dataset[0]

In [5]:
data

HeteroData(
  node_type=[80963],
  [1mall[0m={
    num_nodes=80963,
    x=[80963, 1753],
    y=[80963, 20],
    train_mask=[80963],
    val_mask=[80963],
    test_mask=[80963]
  },
  [1m(all, SOC_link, all)[0m={
    edge_index=[2, 76422265],
    edge_attr=[76422265]
  },
  [1m(all, SPA_link, all)[0m={
    edge_index=[2, 202173159],
    edge_attr=[202173159]
  },
  [1m(all, TEM_link, all)[0m={
    edge_index=[2, 71135671],
    edge_attr=[71135671]
  },
  [1m(all, simp_link, all)[0m={
    edge_index=[2, 290091503],
    edge_attr=[290091503]
  }
)

In [7]:
data['all']['train_mask'].sum()

tensor(361)

In [7]:
#path = osp.join(os.getcwd(), '../../data/VEN')
path = 'dataset/Venice-XL'
dataset = VEN_XL(path)
data = dataset[0]

In [8]:
data

HeteroData(
  node_type=[80963],
  [1mvis_only[0m={
    num_nodes=31140,
    x=[31140, 982],
    y=[31140, 9],
    train_mask=[31140],
    val_mask=[31140],
    test_mask=[31140]
  },
  [1mvis_tex[0m={
    num_nodes=49823,
    x=[49823, 1753],
    y=[49823, 20],
    train_mask=[49823],
    val_mask=[49823],
    test_mask=[49823]
  },
  [1mall[0m={
    num_nodes=80963,
    x=[80963, 1753],
    y=[80963, 20],
    train_mask=[80963],
    val_mask=[80963],
    test_mask=[80963]
  },
  [1m(all, SOC_link, all)[0m={
    edge_index=[2, 76422265],
    edge_attr=[76422265]
  },
  [1m(all, SPA_link, all)[0m={
    edge_index=[2, 202173159],
    edge_attr=[202173159]
  },
  [1m(all, TEM_link, all)[0m={
    edge_index=[2, 71135671],
    edge_attr=[71135671]
  },
  [1m(all, simp_link, all)[0m={
    edge_index=[2, 290091503],
    edge_attr=[290091503]
  },
  [1m(vis_only, SOC_link, vis_only)[0m={
    edge_index=[2, 9813406],
    edge_attr=[9813406]
  },
  [1m(vis_only, SPA_link, vis_o

## Data Loader for Sampling

In [7]:
from torch_geometric.loader import NeighborLoader

In [10]:
data.edge_types

[('all', 'SOC_link', 'all'),
 ('all', 'SPA_link', 'all'),
 ('all', 'TEM_link', 'all'),
 ('all', 'simp_link', 'all'),
 ('vis_only', 'SOC_link', 'vis_only'),
 ('vis_only', 'SPA_link', 'vis_only'),
 ('vis_only', 'TEM_link', 'vis_only'),
 ('vis_only', 'simp_link', 'vis_only'),
 ('vis_only', 'SOC_link', 'vis_tex'),
 ('vis_only', 'SPA_link', 'vis_tex'),
 ('vis_only', 'TEM_link', 'vis_tex'),
 ('vis_only', 'simp_link', 'vis_tex'),
 ('vis_tex', 'SOC_link', 'vis_only'),
 ('vis_tex', 'SPA_link', 'vis_only'),
 ('vis_tex', 'TEM_link', 'vis_only'),
 ('vis_tex', 'simp_link', 'vis_only'),
 ('vis_tex', 'SOC_link', 'vis_tex'),
 ('vis_tex', 'SPA_link', 'vis_tex'),
 ('vis_tex', 'TEM_link', 'vis_tex'),
 ('vis_tex', 'simp_link', 'vis_tex')]

In [None]:
train_loader = NeighborLoader(
    data,
    # Sample 30 neighbors for each node and edge type for 2 iterations
    num_neighbors={key: [5] * 2 for key in data.edge_types if not 'all' in key},
    # Use a batch size of 128 for sampling training nodes of type paper
    batch_size=8,
    input_nodes=('all', data['all'].train_mask),
)

In [163]:
train_loader = NeighborLoader(
    data,
    # Sample 30 neighbors for each node and edge type for 2 iterations
    num_neighbors={key: [5] * 2 for key in data.edge_types},
    # Use a batch size of 128 for sampling training nodes of type paper
    batch_size=8,
    input_nodes=('all', data['all'].train_mask),
)

In [11]:
train_loader = NeighborLoader(
    data,
    # Sample 30 neighbors for each node and edge type for 2 iterations
    num_neighbors={key: [15] * 2 for key in data.edge_types if not 'simp_link' in key},
    # Use a batch size of 128 for sampling training nodes of type paper
    batch_size=32,
    input_nodes=('vis_tex', data['vis_tex'].train_mask),
)

In [191]:
sampled_hetero_data = next(iter(train_loader))
batch = sampled_hetero_data
batch

HeteroData(
  node_type=[2951],
  [1mvis_only[0m={
    num_nodes=1161,
    x=[1161, 982],
    y=[1161, 9],
    train_mask=[1161],
    val_mask=[1161],
    test_mask=[1161]
  },
  [1mvis_tex[0m={
    num_nodes=1755,
    x=[1755, 1753],
    y=[1755, 20],
    train_mask=[1755],
    val_mask=[1755],
    test_mask=[1755],
    batch_size=32
  },
  [1mall[0m={
    num_nodes=0,
    x=[0, 1753],
    y=[0, 20],
    train_mask=[0],
    val_mask=[0],
    test_mask=[0]
  },
  [1m(all, SOC_link, all)[0m={
    edge_index=[2, 0],
    edge_attr=[0]
  },
  [1m(all, SPA_link, all)[0m={
    edge_index=[2, 0],
    edge_attr=[0]
  },
  [1m(all, TEM_link, all)[0m={
    edge_index=[2, 0],
    edge_attr=[0]
  },
  [1m(all, simp_link, all)[0m={
    edge_index=[2, 0],
    edge_attr=[0]
  },
  [1m(vis_only, SOC_link, vis_only)[0m={
    edge_index=[2, 6522],
    edge_attr=[6522]
  },
  [1m(vis_only, SPA_link, vis_only)[0m={
    edge_index=[2, 6777],
    edge_attr=[6777]
  },
  [1m(vis_only, TEM_

In [192]:
batch['vis_tex']['train_mask'].sum()

tensor(361)

In [195]:
batch.x_dict['vis_only'].shape

torch.Size([1161, 982])

In [11]:
train_loader = NeighborLoader(
    data,
    # Sample 30 neighbors for each node and edge type for 2 iterations
    num_neighbors={key: [15] * 2 for key in data.edge_types if not 'simp_link' in key},
    # Use a batch size of 128 for sampling training nodes of type paper
    batch_size=8,
    input_nodes=('vis_tex', data['vis_tex'].train_mask),
)

In [18]:
sampled_hetero_data = next(iter(train_loader))
batch = sampled_hetero_data
batch

HeteroData(
  node_type=[80963],
  [1mvis_only[0m={
    num_nodes=12351,
    x=[12351, 982],
    y=[12351, 9],
    train_mask=[12351],
    val_mask=[12351],
    test_mask=[12351]
  },
  [1mvis_tex[0m={
    num_nodes=15191,
    x=[15191, 1753],
    y=[15191, 20],
    train_mask=[15191],
    val_mask=[15191],
    test_mask=[15191],
    batch_size=8
  },
  [1mall[0m={
    num_nodes=0,
    x=[0, 1753],
    y=[0, 20],
    train_mask=[0],
    val_mask=[0],
    test_mask=[0]
  },
  [1m(all, SOC_link, all)[0m={
    edge_index=[2, 0],
    edge_attr=[0]
  },
  [1m(all, SPA_link, all)[0m={
    edge_index=[2, 0],
    edge_attr=[0]
  },
  [1m(all, TEM_link, all)[0m={
    edge_index=[2, 0],
    edge_attr=[0]
  },
  [1m(all, simp_link, all)[0m={
    edge_index=[2, 0],
    edge_attr=[0]
  },
  [1m(vis_only, SOC_link, vis_only)[0m={
    edge_index=[2, 4361],
    edge_attr=[4361]
  },
  [1m(vis_only, SPA_link, vis_only)[0m={
    edge_index=[2, 4425],
    edge_attr=[4425]
  },
  [1m(vi

In [19]:
batch['vis_tex']['train_mask'].sum()

tensor(3490)

In [20]:
batch.x_dict['vis_only'].shape

torch.Size([12351, 982])