In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier

In [2]:
seed = 123

# Data

In [3]:
nodes = pd.read_csv('../nodes.csv', index_col=0)
nodes.set_index('bank', inplace=True)
nodes.head()

Unnamed: 0_level_0,assets,liabilities,buffer,weights,original_stress,additional_stress,original_losses,additional_losses,additional_defaults
bank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
b1,0.374909,9.631713,5.628295,17.119551,0.007464,0.091307,5.628295,30.256686,3
b10,22.26228,0.995829,2.548139,26.945868,0.011748,0.004283,2.548139,1.159732,0
b100,0.260467,0.056702,5.022584,8.564855,0.003734,0.000189,5.022584,0.061172,0
b101,0.148554,4.966443,9.311341,15.981748,0.006968,0.110387,9.311341,25.097576,3
b102,6.483663,0.525904,6.496722,15.501686,0.006758,0.075514,6.496722,20.851029,2


In [4]:
network = pd.read_csv('../network.csv', index_col=0)
network.head()

Unnamed: 0,b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,...,b116,b117,b118,b119,b120,b121,b122,b123,b124,b125
b1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
b5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Get target

CrossEntropyLoss since we have more than 2 classes

In [5]:
TARGET_COLUMN = 'additional_stress'
QUANTILES = [0.75, 0.5, 0.25]

quant = nodes[TARGET_COLUMN].quantile(QUANTILES)
quant

0.75    0.076151
0.50    0.047829
0.25    0.014961
Name: additional_stress, dtype: float64

In [6]:
is_quant = pd.DataFrame()
free = np.ones(nodes.shape[0]).astype(bool)
for k,v in quant.iteritems():
    is_quant[k] = np.logical_and(nodes[TARGET_COLUMN] >= v, free)
    free = np.logical_and(free, np.logical_not(is_quant[k]))
is_quant[0.0] = free

is_quant

Unnamed: 0_level_0,0.75,0.50,0.25,0.00
bank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
b1,True,False,False,False
b10,False,False,False,True
b100,False,False,False,True
b101,True,False,False,False
b102,False,True,False,False
...,...,...,...,...
b95,False,True,False,False
b96,False,False,True,False
b97,False,True,False,False
b98,False,False,False,True


In [7]:
print(f"Length {is_quant.shape[0]}")
print(f"Number of True values {is_quant.sum().sum()}")

Length 125
Number of True values 125


In [8]:
is_quant_np = is_quant.to_numpy().astype(float)
is_quant_np.dtype

dtype('float64')

In [9]:
target_np = is_quant_np.argmax(1)
target_np

array([0, 3, 3, 0, 1, 3, 3, 3, 3, 2, 1, 2, 1, 0, 2, 2, 3, 0, 2, 1, 0, 2,
       2, 1, 0, 0, 1, 2, 1, 0, 1, 2, 3, 3, 2, 0, 2, 3, 0, 1, 1, 0, 2, 0,
       0, 0, 0, 3, 2, 2, 3, 0, 0, 2, 3, 2, 2, 3, 3, 2, 1, 3, 1, 0, 0, 2,
       3, 2, 0, 1, 0, 3, 2, 1, 3, 3, 0, 2, 3, 3, 1, 2, 0, 1, 2, 3, 3, 0,
       3, 1, 1, 0, 1, 1, 3, 3, 3, 0, 0, 3, 1, 2, 3, 1, 0, 1, 2, 2, 0, 0,
       1, 1, 1, 0, 2, 1, 0, 2, 1, 1, 1, 2, 1, 3, 2], dtype=int64)

In [10]:
is_quant = pd.DataFrame(data=target_np, index=is_quant.index, columns=['label'])
is_quant

Unnamed: 0_level_0,label
bank,Unnamed: 1_level_1
b1,0
b10,3
b100,3
b101,0
b102,1
...,...
b95,1
b96,2
b97,1
b98,3


In [11]:
np.unique(target_np).size

4

# Features

In [12]:
node_attr = [
    'assets',
    'liabilities',
    # 'buffer',
]
nodes_features = nodes[node_attr]
nodes_features.head()

Unnamed: 0_level_0,assets,liabilities
bank,Unnamed: 1_level_1,Unnamed: 2_level_1
b1,0.374909,9.631713
b10,22.26228,0.995829
b100,0.260467,0.056702
b101,0.148554,4.966443
b102,6.483663,0.525904


# Separate train,valid,test

In [13]:
x_train,x_test,y_train,y_test = train_test_split(nodes_features.to_numpy(), target_np, random_state=seed)

# Logistic regression

In [14]:
model_lr = LogisticRegression(multi_class='multinomial',random_state=seed).fit(x_train, y_train)
model_lr.coef_

array([[-0.03013861,  1.01538984],
       [-0.00532458,  0.86129399],
       [ 0.03375411, -0.98538734],
       [ 0.00170908, -0.89129649]])

In [15]:
print(classification_report(y_true=y_test, y_pred=model_lr.predict(x_test)))

              precision    recall  f1-score   support

           0       0.83      0.50      0.62        10
           1       0.00      0.00      0.00         9
           2       0.00      0.00      0.00         8
           3       0.25      1.00      0.40         5

    accuracy                           0.31        32
   macro avg       0.27      0.38      0.26        32
weighted avg       0.30      0.31      0.26        32



In [16]:
print(f"Train accuracy: {model_lr.score(x_train, y_train)}")
print(f"Test accuracy: {model_lr.score(x_test, y_test)}")

Train accuracy: 0.5161290322580645
Test accuracy: 0.3125


# Random forest

In [48]:
model_rf = RandomForestClassifier(random_state=seed, n_estimators=25).fit(x_train,y_train)
model_rf

RandomForestClassifier(n_estimators=25, random_state=123)

In [49]:
print(classification_report(y_true=y_test, y_pred=model_rf.predict(x_test)))

              precision    recall  f1-score   support

           0       0.83      0.50      0.62        10
           1       0.43      0.33      0.38         9
           2       0.55      0.75      0.63         8
           3       0.38      0.60      0.46         5

    accuracy                           0.53        32
   macro avg       0.55      0.55      0.52        32
weighted avg       0.58      0.53      0.53        32



In [50]:
print(f"Train accuracy: {model_rf.score(x_train, y_train)}")
print(f"Test accuracy: {model_rf.score(x_test, y_test)}")

Train accuracy: 1.0
Test accuracy: 0.53125


# KNN

In [51]:
model_knn = KNeighborsClassifier(n_neighbors=np.unique(target_np).size).fit(x_train,y_train)
model_knn

KNeighborsClassifier(n_neighbors=4)

In [52]:
print(classification_report(y_true=y_test, y_pred=model_knn.predict(x_test)))

              precision    recall  f1-score   support

           0       0.75      0.60      0.67        10
           1       0.20      0.11      0.14         9
           2       0.42      0.62      0.50         8
           3       0.29      0.40      0.33         5

    accuracy                           0.44        32
   macro avg       0.41      0.43      0.41        32
weighted avg       0.44      0.44      0.43        32



In [53]:
print(f"Train accuracy: {model_knn.score(x_train, y_train)}")
print(f"Test accuracy: {model_knn.score(x_test, y_test)}")

Train accuracy: 0.6236559139784946
Test accuracy: 0.4375
