In [13]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [14]:
#Read data
df_mx = pd.read_csv("Sample Data/MX.csv", index_col=0)
df_thai = pd.read_csv("Sample Data/THAI.csv", index_col=0)
df_sgp = pd.read_csv("Sample Data/SGP.csv", index_col=0)

df_agg = pd.concat([df_thai, df_mx, df_sgp])
df_test = pd.concat([df_sgp])
df_agg.head()

Unnamed: 0_level_0,Short-term debt (% of total external debt),Real interest rate (%),Total reserves minus gold (current US$),Trade (% of GDP),Imports of goods and services (current US$),Exports of goods and services (current US$),Current account balance (% of GDP),"Portfolio investment, net (BoP, current US$)","Foreign direct investment, net inflows (% of GDP)",Real GDP (current US$),...,Left Government (democ),Polity (polity),Durable (durable),Turnover (xconst),Legislative/Executive election (exrec),State fragility index (sfi),Years (poleff),Economic Effectiveness (ecoeff),Presist (presistance),Target
"Total debt service (% of exports of goods, services and primary income)",Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.47822,0.417122,0.395807,0.000143,0.0,0.0,0.0,0.156366,0.279665,0.212532,1.4e-05,...,0.4,0.6,1,3,8,0.319919,3.0,0.5,3,0
0.488071,0.418222,0.425714,0.0,0.003768,4.7e-05,0.00035,0.147127,0.279731,0.202131,0.0,...,0.0,0.15,0,1,4,0.319711,3.0,0.5,3,0
0.507164,0.417371,0.452184,0.000707,0.028019,0.000663,0.001292,0.158745,0.279695,0.244266,0.000427,...,0.0,0.15,1,1,4,0.319463,3.0,0.5,2,0
0.529773,0.418589,0.497835,0.001689,0.040052,0.002715,0.002906,0.150165,0.279506,0.227943,0.002647,...,0.0,0.0,0,0,0,0.319813,3.0,0.5,2,0
0.568958,0.417134,0.531526,0.003913,0.105083,0.006484,0.005783,0.151565,0.279147,0.316532,0.006558,...,0.4,0.65,0,3,8,0.320109,3.0,0.5,1,0


In [15]:
X = df_agg.drop(columns=['Target'])
y = df_agg['Target']

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=1)
clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)
clf.predict_proba(X_test)

clf.predict(X_test)

clf.score(X_test, y_test)

0.8717948717948718

In [17]:
#Tune MLP
import warnings
warnings.filterwarnings("ignore")

In [18]:
from sklearn.model_selection import ShuffleSplit

cv = ShuffleSplit(n_splits=1, test_size=0.2, random_state=1)

param_grid = {
    'hidden_layer_sizes':[5],
    'activation': ['identity','logistic','tanh','relu'],
    'solver': ['lbfgs','sgd','adam'],
    'learning_rate':['constant','invscaling','adaptive']}

gridSearch = GridSearchCV(MLPClassifier(), param_grid, cv=cv,
                          scoring=['recall','f1'],refit='f1',verbose=2)
gridSearch.fit(X_train, y_train.replace({'yes':1,'no':0}))
print('Score: ', gridSearch.best_score_)
print('Parameters: ', gridSearch.best_params_)

Fitting 1 folds for each of 36 candidates, totalling 36 fits
[CV] END activation=identity, hidden_layer_sizes=5, learning_rate=constant, solver=lbfgs; total time=   0.0s
[CV] END activation=identity, hidden_layer_sizes=5, learning_rate=constant, solver=sgd; total time=   0.0s
[CV] END activation=identity, hidden_layer_sizes=5, learning_rate=constant, solver=adam; total time=   0.0s
[CV] END activation=identity, hidden_layer_sizes=5, learning_rate=invscaling, solver=lbfgs; total time=   0.0s
[CV] END activation=identity, hidden_layer_sizes=5, learning_rate=invscaling, solver=sgd; total time=   0.0s
[CV] END activation=identity, hidden_layer_sizes=5, learning_rate=invscaling, solver=adam; total time=   0.0s
[CV] END activation=identity, hidden_layer_sizes=5, learning_rate=adaptive, solver=lbfgs; total time=   0.0s
[CV] END activation=identity, hidden_layer_sizes=5, learning_rate=adaptive, solver=sgd; total time=   0.0s
[CV] END activation=identity, hidden_layer_sizes=5, learning_rate=ada

In [19]:
param_grid = {
    'hidden_layer_sizes':[5,10,15,(5,5),(5,10)],
    'activation': ['relu'],
    'solver': ['adam'],
    'learning_rate':['invscaling']
}

gridSearch = GridSearchCV(MLPClassifier(), param_grid, cv=cv,
                          scoring='recall',verbose=2)
gridSearch.fit(X_train, y_train.replace({'yes':1,'no':0}))
print('Score: ', gridSearch.best_score_)
print('Parameters: ', gridSearch.best_params_)

Fitting 1 folds for each of 5 candidates, totalling 5 fits
[CV] END activation=relu, hidden_layer_sizes=5, learning_rate=invscaling, solver=adam; total time=   0.0s
[CV] END activation=relu, hidden_layer_sizes=10, learning_rate=invscaling, solver=adam; total time=   0.0s
[CV] END activation=relu, hidden_layer_sizes=15, learning_rate=invscaling, solver=adam; total time=   0.0s
[CV] END activation=relu, hidden_layer_sizes=(5, 5), learning_rate=invscaling, solver=adam; total time=   0.0s
[CV] END activation=relu, hidden_layer_sizes=(5, 10), learning_rate=invscaling, solver=adam; total time=   0.0s
Score:  0.0
Parameters:  {'activation': 'relu', 'hidden_layer_sizes': 5, 'learning_rate': 'invscaling', 'solver': 'adam'}


In [20]:
param_grid = {
    'hidden_layer_sizes':[5,10,15,(5,5),(5,10)],
    'activation': ['identity'],
    'solver': ['adam'],
    'alpha': [0,0.0005,0.0001,0.0005,0.001],
    'learning_rate':['invscaling'],
    'learning_rate_init': [0,0.0001,0.0005,0.001, 0.005,0.01],
    'max_iter': [200,500],
    'shuffle': [True,False] 
}

gridSearch = GridSearchCV(MLPClassifier(), param_grid, cv=cv,
                          scoring='recall',verbose=2)
gridSearch.fit(X_train, y_train.replace({'yes':1,'no':0}))
print('Score: ', gridSearch.best_score_)
print('Parameters: ', gridSearch.best_params_)

Fitting 1 folds for each of 600 candidates, totalling 600 fits
[CV] END activation=identity, alpha=0, hidden_layer_sizes=5, learning_rate=invscaling, learning_rate_init=0, max_iter=200, shuffle=True, solver=adam; total time=   0.0s
[CV] END activation=identity, alpha=0, hidden_layer_sizes=5, learning_rate=invscaling, learning_rate_init=0, max_iter=200, shuffle=False, solver=adam; total time=   0.0s
[CV] END activation=identity, alpha=0, hidden_layer_sizes=5, learning_rate=invscaling, learning_rate_init=0, max_iter=500, shuffle=True, solver=adam; total time=   0.0s
[CV] END activation=identity, alpha=0, hidden_layer_sizes=5, learning_rate=invscaling, learning_rate_init=0, max_iter=500, shuffle=False, solver=adam; total time=   0.0s
[CV] END activation=identity, alpha=0, hidden_layer_sizes=5, learning_rate=invscaling, learning_rate_init=0.0001, max_iter=200, shuffle=True, solver=adam; total time=   0.0s
[CV] END activation=identity, alpha=0, hidden_layer_sizes=5, learning_rate=invscaling

In [22]:
X_train_s, X_tune, y_train_s, y_tune = train_test_split(X_train, y_train,
                                                        test_size=0.2)
for i in range(15):
    mlp = MLPClassifier(hidden_layer_sizes=5,activation='relu',
                        learning_rate='invscaling', solver='adam',
                        random_state=i)
    
    mlp.fit(X_train_s, y_train_s)
    y_predict_mlp = mlp.predict(X_tune)

In [23]:
mlp = MLPClassifier(hidden_layer_sizes=5,activation='relu',
                        learning_rate='invscaling', solver='adam',
                        random_state=13)

mlp.fit(X_train_s, y_train_s)
y_test_predicted = mlp.predict(X_test)


In [26]:

#score
from sklearn.metrics import precision_score, accuracy_score, roc_auc_score, confusion_matrix,f1_score, roc_curve, auc, recall_score

# 績效
print("Confusion Matrix:\n", confusion_matrix(y_test, y_test_predicted))
print("Accuracy:", accuracy_score(y_test, y_test_predicted))

print("Precision Score",precision_score(y_test, y_test_predicted, average='macro'))
print("Precision Score (Micro", precision_score(y_test, y_test_predicted, average='micro'))
print("Precision Score (Weighted)",precision_score(y_test, y_test_predicted, average='weighted'))
print("Precision Score", precision_score(y_test, y_test_predicted, average=None))

print("Recall Score (Macro)", recall_score(y_test, y_test_predicted, average='macro',zero_division=1))
print("Recall Score (Micro)", recall_score(y_test, y_test_predicted, average='micro',zero_division=1))
print("Recall Score (Weight)",recall_score(y_test, y_test_predicted, average='weighted',zero_division=1))
print("Recall Score (Avg None)",recall_score(y_test, y_test_predicted, average=None,zero_division=1))


#print("Roc_Score:", roc_auc_score(y, clf.decision_function(X)))      
print("F1 Score (Macro):", f1_score(y_test, y_test_predicted, average='macro'))
print("F1 Score (Micro):", f1_score(y_test, y_test_predicted, average='micro'))
print("F1 Score (Weighted):", f1_score(y_test, y_test_predicted, average='weighted'))
print("F1 Score (Avg None):", f1_score(y_test, y_test_predicted, average=None))
print("F1 Score:", f1_score(y_test, y_test_predicted, zero_division=1))

Confusion Matrix:
 [[34  0]
 [ 5  0]]
Accuracy: 0.8717948717948718
Precision Score 0.4358974358974359
Precision Score (Micro 0.8717948717948718
Precision Score (Weighted) 0.760026298487837
Precision Score [0.87179487 0.        ]
Recall Score (Macro) 0.5
Recall Score (Micro) 0.8717948717948718
Recall Score (Weight) 0.8717948717948718
Recall Score (Avg None) [1. 0.]
F1 Score (Macro): 0.4657534246575343
F1 Score (Micro): 0.8717948717948718
F1 Score (Weighted): 0.8120828942746752
F1 Score (Avg None): [0.93150685 0.        ]
F1 Score: 0.0
