In [2]:
import pandas as pd
import dgl
import torch
import numpy as np
import networkx as nx

Using backend: pytorch


# Data

In [146]:
nodes = pd.read_csv('../nodes.csv', index_col=0)
nodes.set_index('bank', inplace=True)
nodes.head()

Unnamed: 0_level_0,assets,liabilities,buffer,weights,original_stress,additional_stress,original_losses,additional_losses,additional_defaults
bank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
b1,0.374909,9.631713,5.628295,17.119551,0.007464,0.091307,5.628295,30.256686,3
b10,22.26228,0.995829,2.548139,26.945868,0.011748,0.004283,2.548139,1.159732,0
b100,0.260467,0.056702,5.022584,8.564855,0.003734,0.000189,5.022584,0.061172,0
b101,0.148554,4.966443,9.311341,15.981748,0.006968,0.110387,9.311341,25.097576,3
b102,6.483663,0.525904,6.496722,15.501686,0.006758,0.075514,6.496722,20.851029,2


In [3]:
network = pd.read_csv('../network.csv', index_col=0)
network.head()

Unnamed: 0,b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,...,b116,b117,b118,b119,b120,b121,b122,b123,b124,b125
b1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Get target

CrossEntropyLoss since we have more than 2 classes

In [148]:
TARGET_COLUMN = 'additional_stress'
QUANTILES = [0.75, 0.5, 0.25]

quant = nodes[TARGET_COLUMN].quantile(QUANTILES)
quant

0.75    0.076151
0.50    0.047829
0.25    0.014961
Name: additional_stress, dtype: float64

In [149]:
is_quant = pd.DataFrame()
free = np.ones(nodes.shape[0]).astype(bool)
for k,v in quant.iteritems():
    is_quant[k] = np.logical_and(nodes[TARGET_COLUMN] >= v, free)
    free = np.logical_and(free, np.logical_not(is_quant[k]))
is_quant[0.0] = free

is_quant

Unnamed: 0_level_0,0.75,0.50,0.25,0.00
bank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
b1,True,False,False,False
b10,False,False,False,True
b100,False,False,False,True
b101,True,False,False,False
b102,False,True,False,False
...,...,...,...,...
b95,False,True,False,False
b96,False,False,True,False
b97,False,True,False,False
b98,False,False,False,True


In [150]:
print(f"Length {is_quant.shape[0]}")
print(f"Number of True values {is_quant.sum().sum()}")

Length 125
Number of True values 125


In [151]:
is_quant_np = is_quant.to_numpy().astype(float)
is_quant_np.dtype

dtype('float64')

In [152]:
target_np = is_quant_np.argmax(1)
target_np

array([0, 3, 3, 0, 1, 3, 3, 3, 3, 2, 1, 2, 1, 0, 2, 2, 3, 0, 2, 1, 0, 2,
       2, 1, 0, 0, 1, 2, 1, 0, 1, 2, 3, 3, 2, 0, 2, 3, 0, 1, 1, 0, 2, 0,
       0, 0, 0, 3, 2, 2, 3, 0, 0, 2, 3, 2, 2, 3, 3, 2, 1, 3, 1, 0, 0, 2,
       3, 2, 0, 1, 0, 3, 2, 1, 3, 3, 0, 2, 3, 3, 1, 2, 0, 1, 2, 3, 3, 0,
       3, 1, 1, 0, 1, 1, 3, 3, 3, 0, 0, 3, 1, 2, 3, 1, 0, 1, 2, 2, 0, 0,
       1, 1, 1, 0, 2, 1, 0, 2, 1, 1, 1, 2, 1, 3, 2], dtype=int64)

In [153]:
is_quant = pd.DataFrame(data=target_np, index=is_quant.index, columns=['label'])
is_quant

Unnamed: 0_level_0,label
bank,Unnamed: 1_level_1
b1,0
b10,3
b100,3
b101,0
b102,1
...,...
b95,1
b96,2
b97,1
b98,3


## Dataset

In [154]:
graph = nx.convert_matrix.from_pandas_adjacency(network, create_using=nx.DiGraph)
graph.edges[('b1', 'b55')]

{'weight': 0.37490927455228}

In [155]:
nodes.columns

Index(['assets', 'liabilities', 'buffer', 'weights', 'original_stress',
       'additional_stress', 'original_losses', 'additional_losses',
       'additional_defaults'],
      dtype='object')

In [156]:
node_attr = [
    'assets',
    'liabilities',
    # 'buffer',
]
nodes_features = nodes[node_attr]
nodes_features.head()

Unnamed: 0_level_0,assets,liabilities
bank,Unnamed: 1_level_1,Unnamed: 2_level_1
b1,0.374909,9.631713
b10,22.26228,0.995829
b100,0.260467,0.056702
b101,0.148554,4.966443
b102,6.483663,0.525904


In order to group the features together we need to get a dictionary of {"bank":{"feat":values}}

In [166]:
{k:{"feat":torch.as_tensor(v, dtype=torch.float)} for k,v in nodes_features.T.to_dict('list').items()}['b10']

{'feat': tensor([22.2623,  0.9958])}

In [168]:
nx.set_node_attributes(graph, {k:{"feat":torch.as_tensor(v, dtype=torch.float)} for k,v in nodes_features.T.to_dict('list').items()})
graph.nodes(True)['b10']

{'feat': tensor([22.2623,  0.9958])}

Add target to graph

In [171]:
is_quant.to_dict('index')['b10']

{'label': 3}

In [172]:
nx.set_node_attributes(graph, is_quant.to_dict('index'))
graph.nodes(True)['b10']

{'feat': tensor([22.2623,  0.9958]), 'label': 3}

### Create dgl graph

In [173]:
print(f"Graph with {graph.number_of_nodes()} nodes")
print(f"Graph with {graph.number_of_edges()} edges")

Graph with 125 nodes
Graph with 249 edges


In [174]:
graph_dgl = dgl.from_networkx(graph,node_attrs=['feat', 'label'],edge_attrs=['weight'])
graph_dgl

Graph(num_nodes=125, num_edges=249,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'weight': Scheme(shape=(), dtype=torch.float32)})

### Dataset class

https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html

The basic DGL dataset for creating graph datasets. This class defines a basic template class for DGL Dataset. The following steps will are executed automatically:

1. Check whether there is a dataset cache on disk (already processed and stored on the disk) by invoking has_cache(). If true, goto 5.
2. Call download() to download the data.
3. Call process() to process the data.
4. Call save() to save the processed dataset on disk and goto 6.
5. Call load() to load the processed dataset from disk.
6. Done.

In [176]:
class ContagionDataset(dgl.data.DGLDataset):
    def __init__(self, sets_lengths=(0.8,0.1,0.1)):
        super().__init__(name='sistemic_risk', verbose=True)
        self.sets_lengths = sets_lengths

    def process(self):
        # graph with nodes and edges features
        self.graph = graph_dgl

        # for each graph in list: in this case only one
        # create train, validation and test masks
        n_nodes = self.graph.num_nodes()
        n_train, n_val = (int(n_nodes * k) for k in self.sets_lengths[:2])
        train_mask, val_mask, test_mask = [torch.zeros(n_nodes, dtype=torch.bool) for k in range(3)]
        train_mask[:n_train] = True
        val_mask[n_train:n_train+n_val] = True
        test_mask[n_train+n_val:] = True

        # shuffle
        idx = torch.randperm(n_nodes)
        train_mask, val_mask[idx], test_mask[idx] = train_mask[idx], val_mask[idx], test_mask[idx]

        # set mask in nodes
        self.graph.ndata['train_mask'] = train_mask
        self.graph.ndata['val_mask'] = val_mask
        self.graph.ndata['test_mask'] = test_mask

    def __len__(self):
        return len(self.graphs)

    def __getitem__(self,i):
        return self.graphs[i]
    