In [71]:
import pandas as pd
import dgl
import torch
import numpy as np
import networkx as nx

# Data

In [72]:
nodes = pd.read_csv('nodes.csv', index_col=0)
nodes.head()

Unnamed: 0,bank,assets,liabilities,buffer,weights,original_stress,additional_stress,original_losses,additional_losses,additional_defaults
1,b1,0.374909,9.631713,5.628295,17.119551,0.007464,0.091307,5.628295,30.256686,3
2,b10,22.26228,0.995829,2.548139,26.945868,0.011748,0.004283,2.548139,1.159732,0
3,b100,0.260467,0.056702,5.022584,8.564855,0.003734,0.000189,5.022584,0.061172,0
4,b101,0.148554,4.966443,9.311341,15.981748,0.006968,0.110387,9.311341,25.097576,3
5,b102,6.483663,0.525904,6.496722,15.501686,0.006758,0.075514,6.496722,20.851029,2


In [73]:
network = pd.read_csv('network.csv', index_col=0)
network.head()

Unnamed: 0,b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,...,b116,b117,b118,b119,b120,b121,b122,b123,b124,b125
b1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Get target

In [74]:
TARGET_COLUMN = 'additional_stress'
QUANTILES = [0.75, 0.5, 0.25]

quant = nodes[TARGET_COLUMN].quantile(QUANTILES)
quant

0.75    0.076151
0.50    0.047829
0.25    0.014961
Name: additional_stress, dtype: float64

In [75]:
free = np.ones(nodes.shape[0]).astype(bool)
np.logical_and((nodes[TARGET_COLUMN] >= quant[0.75]), free).sum()
(nodes[TARGET_COLUMN] >= quant[0.75]).sum()

32

In [76]:
is_quant = pd.DataFrame()
free = np.ones(nodes.shape[0]).astype(bool)
for k,v in quant.iteritems():
    is_quant[k] = np.logical_and(nodes[TARGET_COLUMN] >= v, free)
    free = np.logical_and(free, np.logical_not(is_quant[k]))
is_quant[0.0] = free

is_quant

Unnamed: 0,0.75,0.50,0.25,0.00
1,True,False,False,False
2,False,False,False,True
3,False,False,False,True
4,True,False,False,False
5,False,True,False,False
...,...,...,...,...
121,False,True,False,False
122,False,False,True,False
123,False,True,False,False
124,False,False,False,True


In [77]:
is_quant = is_quant.to_numpy().astype(float)
is_quant.dtype

dtype('float64')

## Dataset

In [80]:
graph = nx.convert_matrix.from_pandas_adjacency(network, create_using=nx.DiGraph)
graph.edges[('b1', 'b55')]

{'weight': 0.37490927455228}

### Node features

In [21]:
nodes.columns

Index(['bank', 'assets', 'liabilities', 'buffer', 'weights', 'original_stress',
       'additional_stress', 'original_losses', 'additional_losses',
       'additional_defaults'],
      dtype='object')

In [1]:
nodes_features = nodes[[
    'bank',
    'assets',
    'liabilities',
    'buffer',
]]

NameError: name 'nodes' is not defined

In [50]:
nx.set_node_attributes(graph, nodes_features.set_index('bank').to_dict('index'))

In [51]:
graph.nodes(True)['b1']

{'assets': 0.37490927455228,
 'liabilities': 9.63171274300451,
 'buffer': 5.62829471283226}

In [53]:
graph_dgl.nodes()

tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
         14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
         28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
         42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
         56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
         70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
         84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
         98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
        112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124])

In [81]:
graph_dgl = dgl.from_networkx(graph,edge_attrs=['weight'])

In [83]:
graph_dgl.adj()[0,54]

tensor(1.)

### Dataset class

In [None]:
class PDataset(dgl.data.DGLDataset):
    def __init__(self):
        super().__init__(name='sistemic_risk')

    def process(self):
        # load nodes attributes
        nodes_data = df = pd.read_csv('nodes.csv', index_col=0)
        # load adjacency matrix
        adj_matrix = pd.read_csv('network.csv', index_col=0)

        # nodes features
        nodes_features = torch.from_numpy(nodes_data[[
            'assets',
            'liabilities',
            'buffer',
            'weights',
        ]].to_numpy())

        # set labels
        # is_quant

        self.graph = dgl.from_networkx(graph,edge_attrs=['weight'])
        self.graph.ndata

        

        

    def len(self):
        pass

    def __getitem__(self,i):
        pass
    