In [8]:
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

In [None]:
import warnings
warnings.filterwarnings('ignore')

#### **Getting training & validation data**

In [9]:
df_train = pd.read_csv('training.csv')

In [10]:
df_train = df_train.drop(['IND_BOM_1_2'], axis=1)

In [11]:
df_train.head(2)

Unnamed: 0,UF_1,UF_2,UF_3,UF_4,UF_5,UF_6,UF_7,IDADE,SEXO_1,NIVEL_RELACIONAMENTO_CREDITO01,...,CEP4_6,CEP4_7,CEP4_8,CEP4_9,CEP4_10,CEP4_11,CEP4_12,CEP4_13,CEP4_14,IND_BOM_1_1
0,1,0,0,1,1,0,0,0.53839,0,0.111111,...,0,1,0,0,0,1,0,1,1,1
1,1,1,1,0,0,0,0,0.861432,1,0.111111,...,0,1,1,0,0,0,0,0,1,1


In [14]:
X, y = df_train.drop(['IND_BOM_1_1'], axis=1), df_train['IND_BOM_1_1']

In [None]:
X_val, y_val = df_train.drop(['IND_BOM_1_1'], axis=1), df_train['IND_BOM_1_1']

#### **Parameter selection**

In [None]:
param_grid = {
    'hidden_layer_sizes': [(32,), (64,), (128,)],
    'alpha': [0.001, 0.01, 0.46],
    'max_iter': [100, 150, 200],
    'batch_size': [32, 64, 128],
    'solver': ['sgd', 'adam']
}

In [None]:
grid_search = GridSearchCV(mlp, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

In [None]:
best_parameters = grid_search.best_params_
best_parameters

In [None]:
grid_search.best_score_

#### **Model training**

In [21]:
mlp_net = MLPClassifier(random_state=1, **best_parameters)

In [None]:
mlp_net.fit()

In [None]:
train_losses = []
val_losses = []
train_accs = []
val_accs = []
grad_norms = []

for i in range(mlp.max_iter):

    mlp.partial_fit(X_train, y_train, classes=np.unique(y_train))

    grad_norm = np.linalg.norm(mlp.coefs_[0])
    grad_norms.append(grad_norm)

    train_loss = mlp.loss_
    train_losses.append(train_loss)

    test_pred = mlp.predict(X_val)
    mean_squared_error = np.mean((test_pred - y_val)**2)
    val_losses.append(mean_squared_error)

    train_acc = mlp.score(X_train, y_train) 
    train_accs.append(train_acc)

    val_acc = mlp.score(X_val, y_val)
    val_accs.append(val_acc)

#### **Model analysis**

##### Gradient Norm: Plot the gradient norm over time to see if the gradients are exploding or vanishing

In [None]:
plt.plot(grad_norms, label='Gradient Norm')
plt.xlabel('Epoch')
plt.ylabel('Gradient Norm')
plt.show()

##### Training and Validation Loss

###### explanation

In [None]:
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

##### Training and Validation Accuracy: Plot the training and validation accuracy over time to see how well the model is classifying the MNIST digits.

In [None]:
plt.plot(train_accs, label='Training Accuracy')
plt.plot(val_accs, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

##### Confusion Matrix

###### explanation

In [None]:
y_pred = mlp.predict(X_val)

conf_matrix = confusion_matrix(y_val, y_pred)

fig, ax = plt.subplots(figsize=(5, 5))
ax.imshow(conf_matrix, cmap='Blues')
ax.set_xticks(np.arange(10))
ax.set_yticks(np.arange(10))
ax.set_xticklabels(np.arange(10))
ax.set_yticklabels(np.arange(10))
for i in range(10):
    for j in range(10):
        ax.text(j, i, conf_matrix[i, j], ha='center', va='center', color='white')

plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()