In [39]:
#loading the dataset
#importing numpy 
import numpy as np
#importing matplotlib
import pandas as pd

import matplotlib.pyplot as plt
#importing Scaling library
from sklearn.preprocessing import LabelEncoder,StandardScaler
#importing train_test_split
from sklearn.model_selection import train_test_split
#importing neural network
import tensorflow as tf
import plotly.express as px

# Loading the Dataset

In [4]:
#loading the dataset
df=pd.read_csv('/kaggle/input/africa-economic-banking-and-systemic-crisis-data/african_crises.csv')
#showing the dataset
df

Unnamed: 0,case,cc3,country,year,systemic_crisis,exch_usd,domestic_debt_in_default,sovereign_external_debt_default,gdp_weighted_default,inflation_annual_cpi,independence,currency_crises,inflation_crises,banking_crisis
0,1,DZA,Algeria,1870,1,0.052264,0,0,0.0,3.441456,0,0,0,crisis
1,1,DZA,Algeria,1871,0,0.052798,0,0,0.0,14.149140,0,0,0,no_crisis
2,1,DZA,Algeria,1872,0,0.052274,0,0,0.0,-3.718593,0,0,0,no_crisis
3,1,DZA,Algeria,1873,0,0.051680,0,0,0.0,11.203897,0,0,0,no_crisis
4,1,DZA,Algeria,1874,0,0.051308,0,0,0.0,-3.848561,0,0,0,no_crisis
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1054,70,ZWE,Zimbabwe,2009,1,354.800000,1,1,0.0,-7.670000,1,1,0,crisis
1055,70,ZWE,Zimbabwe,2010,0,378.200000,1,1,0.0,3.217000,1,0,0,no_crisis
1056,70,ZWE,Zimbabwe,2011,0,361.900000,1,1,0.0,4.920000,1,0,0,no_crisis
1057,70,ZWE,Zimbabwe,2012,0,361.900000,1,1,0.0,3.720000,1,0,0,no_crisis


# Getting basic information about the dataset

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1059 entries, 0 to 1058
Data columns (total 14 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   case                             1059 non-null   int64  
 1   cc3                              1059 non-null   object 
 2   country                          1059 non-null   object 
 3   year                             1059 non-null   int64  
 4   systemic_crisis                  1059 non-null   int64  
 5   exch_usd                         1059 non-null   float64
 6   domestic_debt_in_default         1059 non-null   int64  
 7   sovereign_external_debt_default  1059 non-null   int64  
 8   gdp_weighted_default             1059 non-null   float64
 9   inflation_annual_cpi             1059 non-null   float64
 10  independence                     1059 non-null   int64  
 11  currency_crises                  1059 non-null   int64  
 12  inflation_crises    

# Checking for Missing Values

In [6]:
df.isna().sum()

case                               0
cc3                                0
country                            0
year                               0
systemic_crisis                    0
exch_usd                           0
domestic_debt_in_default           0
sovereign_external_debt_default    0
gdp_weighted_default               0
inflation_annual_cpi               0
independence                       0
currency_crises                    0
inflation_crises                   0
banking_crisis                     0
dtype: int64

# Preprocessing the Dataset

# Dropping Unnecessary Columns

In [9]:
df=df.drop(['case','country'],axis=1)

In [10]:
df

Unnamed: 0,cc3,year,systemic_crisis,exch_usd,domestic_debt_in_default,sovereign_external_debt_default,gdp_weighted_default,inflation_annual_cpi,independence,currency_crises,inflation_crises,banking_crisis
0,DZA,1870,1,0.052264,0,0,0.0,3.441456,0,0,0,crisis
1,DZA,1871,0,0.052798,0,0,0.0,14.149140,0,0,0,no_crisis
2,DZA,1872,0,0.052274,0,0,0.0,-3.718593,0,0,0,no_crisis
3,DZA,1873,0,0.051680,0,0,0.0,11.203897,0,0,0,no_crisis
4,DZA,1874,0,0.051308,0,0,0.0,-3.848561,0,0,0,no_crisis
...,...,...,...,...,...,...,...,...,...,...,...,...
1054,ZWE,2009,1,354.800000,1,1,0.0,-7.670000,1,1,0,crisis
1055,ZWE,2010,0,378.200000,1,1,0.0,3.217000,1,0,0,no_crisis
1056,ZWE,2011,0,361.900000,1,1,0.0,4.920000,1,0,0,no_crisis
1057,ZWE,2012,0,361.900000,1,1,0.0,3.720000,1,0,0,no_crisis


# Onehot Encoding CC3 Column

In [15]:
dummies_cc3=pd.get_dummies(df['cc3'])
#concating with original dataset
df=pd.concat([df,dummies_cc3],axis=1)
#dropping the original cc3 column from the dataset
df=df.drop('cc3',axis=1)

In [16]:
df

Unnamed: 0,year,systemic_crisis,exch_usd,domestic_debt_in_default,sovereign_external_debt_default,gdp_weighted_default,inflation_annual_cpi,independence,currency_crises,inflation_crises,...,DZA,EGY,KEN,MAR,MUS,NGA,TUN,ZAF,ZMB,ZWE
0,1870,1,0.052264,0,0,0.0,3.441456,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,1871,0,0.052798,0,0,0.0,14.149140,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,1872,0,0.052274,0,0,0.0,-3.718593,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,1873,0,0.051680,0,0,0.0,11.203897,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,1874,0,0.051308,0,0,0.0,-3.848561,0,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1054,2009,1,354.800000,1,1,0.0,-7.670000,1,1,0,...,0,0,0,0,0,0,0,0,0,1
1055,2010,0,378.200000,1,1,0.0,3.217000,1,0,0,...,0,0,0,0,0,0,0,0,0,1
1056,2011,0,361.900000,1,1,0.0,4.920000,1,0,0,...,0,0,0,0,0,0,0,0,0,1
1057,2012,0,361.900000,1,1,0.0,3.720000,1,0,0,...,0,0,0,0,0,0,0,0,0,1


# Splitting between x and y

In [17]:
y=df['banking_crisis']
x=df.drop('banking_crisis',axis=1)

# Encoding the x and y

In [19]:
label_encoder=LabelEncoder()
y=label_encoder.fit_transform(y)

In [22]:
{index:label for index,label in enumerate(label_encoder.classes_)}

{0: 'crisis', 1: 'no_crisis'}

In [26]:
y=pd.Series(y).apply(lambda x:1-x)

In [27]:
{index:label for index,label in enumerate(label_encoder.classes_)}

{0: 'crisis', 1: 'no_crisis'}

# Encoding x with StandardScaler

In [28]:
scaler=StandardScaler()
x=pd.DataFrame(scaler.fit_transform(x),columns=x.columns)

# Splitting between train and test set

In [30]:
x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.7)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(741, 36)
(318, 36)
(741,)
(318,)


# Training the Neural Network Model

In [32]:
inputs=tf.keras.Input(shape=(36,))
x=tf.keras.layers.Dense(64,activation='relu')(inputs)
x=tf.keras.layers.Dense(64,activation='relu')(x)
outputs=tf.keras.layers.Dense(1,activation='sigmoid')(x)

2023-01-15 04:38:46.099644: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


# Compiling the Model

In [33]:
model=tf.keras.Model(inputs=inputs,outputs=outputs)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 36)]              0         
_________________________________________________________________
dense (Dense)                (None, 64)                2368      
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
Total params: 6,593
Trainable params: 6,593
Non-trainable params: 0
_________________________________________________________________


In [34]:
y.sum()/len(y)

0.08876298394711993

In [35]:
model.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=[tf.keras.metrics.AUC(name='auc')])
batch_size=64
epochs=60


In [37]:
history=model.fit(x_train,y_train,validation_split=0.2,batch_size=batch_size,epochs=epochs,
         callbacks=[tf.keras.callbacks.ReduceLROnPlateau()],
         verbose=0)

2023-01-15 04:43:52.079919: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


# Visualizing the Loss Function

# Evaluating the Model

In [43]:
model.evaluate(x_test,y_test)



[0.05006810650229454, 0.9959908127784729]

<enumerate at 0x7fedb5c04fa0>